From 4ea437e3e6b65d243b9857765bb84fe9bf14be3b Mon Sep 17 00:00:00 2001 From: Aron Date: Mon, 5 Jan 2026 13:40:12 +0100 Subject: [PATCH] gewobag fix --- .env.example | 1 + README.md | 3 +- handlers/gesobau_handler.py | 77 +++++--- handlers/gewobag_handler.py | 355 ++++++++++++++++++++++++++---------- 4 files changed, 312 insertions(+), 124 deletions(-) diff --git a/.env.example b/.env.example index b404631..696dc2b 100644 --- a/.env.example +++ b/.env.example @@ -21,6 +21,7 @@ FORM_HAUSNUMMER=1 FORM_PLZ=10115 FORM_ORT=Berlin FORM_PERSONS=2 +FORM_ADULTS=1 FORM_CHILDREN=0 FORM_INCOME=2500 diff --git a/README.md b/README.md index 816fe41..edd8891 100644 --- a/README.md +++ b/README.md @@ -91,7 +91,8 @@ All scripts deduplicate by key and timestamp. - `FORM_PLZ` - Postal code - `FORM_ORT` - City - `FORM_PERSONS` - Number of persons in household -- `FORM_CHILDREN` - Number of children +- `FORM_ADULTS` - Number of adults (for GEWOBAG forms, defaults to 1) +- `FORM_CHILDREN` - Number of children (defaults to 0) - `FORM_INCOME` - Monthly net income ### WGcompany filters diff --git a/handlers/gesobau_handler.py b/handlers/gesobau_handler.py index 352a989..3beeb43 100644 --- a/handlers/gesobau_handler.py +++ b/handlers/gesobau_handler.py @@ -1,6 +1,7 @@ from .base_handler import BaseHandler import logging import asyncio +import os from pathlib import Path logger = logging.getLogger(__name__) @@ -22,33 +23,47 @@ class GesobauHandler(BaseHandler): # 404 detection status = response.status if response else None page_title = await page.title() - if status == 404 or (page_title and "404" in page_title): - logger.warning(f"[GESOBAU] Listing is down (404): {listing['link']}") + page_content = await page.content() + is_404 = ( + status == 404 or + (page_title and "404" in page_title) or + (page_title and "nicht gefunden" in page_title.lower()) or + ("Angebot nicht mehr verfügbar" in page_content) + ) + if is_404: + logger.warning(f"[GESOBAU] Listing is down (404 or unavailable): {listing['link']}") result["success"] = False result["message"] = "Listing is no longer available (404). Application impossible. Will not retry." result["deactivated"] = True - await page.close() return result - # Always handle cookies and consent before anything else - await self.handle_cookies(page) - await self.handle_consent(page) - - # Save HTML after modal handling for debugging and check for deactivation - html_content = await page.content() + # Dismiss cookie banner try: - with open(DATA_DIR / "gesobau_debug.html", "w", encoding="utf-8") as f: - f.write(html_content) - except Exception as e: - logger.debug(f"[GESOBAU] Debug HTML not saved: {e}") + cookie_btn = await page.query_selector('#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll, button:has-text("Alle akzeptieren")') + if cookie_btn and await cookie_btn.is_visible(): + await cookie_btn.click() + logger.info("[GESOBAU] Dismissed cookie banner") + await asyncio.sleep(1) + except: + pass - # Tailored 404 detection: Angebot nicht mehr verfügbar - if "Angebot nicht mehr verfügbar" in html_content: - logger.warning("[GESOBAU] Permanent fail: Angebot nicht mehr verfügbar") - result["deactivated"] = True - result["message"] = "Listing is no longer available (Angebot nicht mehr verfügbar). Marked as deactivated." - await page.close() - return result + # Save debug HTML and screenshot + try: + html_content = await page.content() + with open(DATA_DIR / f"gesobau_debug_{listing['id']}.html", "w", encoding="utf-8") as f: + f.write(html_content) + logger.info(f"[GESOBAU] Saved debug HTML: gesobau_debug_{listing['id']}.html") + except Exception as e: + logger.warning(f"[GESOBAU] Could not save debug HTML: {e}") + + try: + await page.screenshot(path=DATA_DIR / f"gesobau_page_{listing['id']}.png", full_page=True) + logger.info(f"[GESOBAU] Saved page screenshot: gesobau_page_{listing['id']}.png") + except Exception as e: + logger.warning(f"[GESOBAU] Could not save screenshot: {e}") + + # Log listing details + await self.log_listing_details(listing) # Look for application button logger.info("[GESOBAU] Searching for application button...") @@ -56,6 +71,8 @@ class GesobauHandler(BaseHandler): 'a[href*="bewerben"]', 'button:has-text("Bewerben")', 'a:has-text("Bewerben")', + 'button:has-text("Interesse")', + 'a:has-text("Kontakt")', 'button.btn', ] @@ -77,22 +94,25 @@ class GesobauHandler(BaseHandler): if apply_btn: await apply_btn.scroll_into_view_if_needed() await asyncio.sleep(0.5) + logger.info("[GESOBAU] Clicking application button...") await apply_btn.click() await asyncio.sleep(2) - # --- Post-click confirmation logic --- - logger.info("[GESOBAU] Clicked application button, checking for confirmation...") + # Save screenshot and HTML after click + logger.info("[GESOBAU] Checking for confirmation...") try: - await page.screenshot(path="data/gesobau_after_apply.png") - logger.info("[GESOBAU] Saved screenshot after application click.") + await page.screenshot(path=DATA_DIR / f"gesobau_after_apply_{listing['id']}.png", full_page=True) + logger.info(f"[GESOBAU] Saved after-apply screenshot: gesobau_after_apply_{listing['id']}.png") except Exception as e: - logger.warning(f"[GESOBAU] Could not save screenshot: {e}") + logger.warning(f"[GESOBAU] Could not save after-apply screenshot: {e}") + try: html_after = await page.content() - with open("data/gesobau_after_apply.html", "w", encoding="utf-8") as f: + with open(DATA_DIR / f"gesobau_after_apply_{listing['id']}.html", "w", encoding="utf-8") as f: f.write(html_after) - logger.info("[GESOBAU] Saved HTML after application click.") + logger.info(f"[GESOBAU] Saved after-apply HTML: gesobau_after_apply_{listing['id']}.html") except Exception as e: + logger.warning(f"[GESOBAU] Could not save after-apply HTML: {e}") logger.warning(f"[GESOBAU] Could not save HTML after apply: {e}") # Look for confirmation message on the page @@ -123,6 +143,9 @@ class GesobauHandler(BaseHandler): else: logger.warning("[GESOBAU] No application button found.") result["message"] = "No application button found." + screenshot_path = DATA_DIR / f"gesobau_nobtn_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path)) + logger.info(f"[GESOBAU] Saved no-button screenshot: {screenshot_path}") except Exception as e: result["message"] = f"Error during application: {e}" logger.error(f"[GESOBAU] Application error: {e}") diff --git a/handlers/gewobag_handler.py b/handlers/gewobag_handler.py index fc78348..62f1a23 100644 --- a/handlers/gewobag_handler.py +++ b/handlers/gewobag_handler.py @@ -1,6 +1,7 @@ from .base_handler import BaseHandler import logging import asyncio +import os from pathlib import Path logger = logging.getLogger(__name__) @@ -36,113 +37,275 @@ class GewobagHandler(BaseHandler): result["deactivated"] = True return result - # Always handle cookies and consent before anything else - await self.handle_cookies(page) - await self.handle_consent(page) - - # Save HTML after modal handling for debugging + # Dismiss cookie banner try: - html_content = await page.content() - with open(DATA_DIR / "gewobag_debug.html", "w", encoding="utf-8") as f: - f.write(html_content) - except Exception as e: - logger.warning(f"[GEWOBAG] Could not save debug HTML: {e}") + cookie_btn = await page.query_selector('#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll, button:has-text("Alle akzeptieren")') + if cookie_btn and await cookie_btn.is_visible(): + await cookie_btn.click() + logger.info("[GEWOBAG] Dismissed cookie banner") + await asyncio.sleep(1) + except: + pass - # Log listing details - await self.log_listing_details(listing) + # Gewobag uses Wohnungshelden iframe directly on the page + logger.info("[GEWOBAG] Looking for Wohnungshelden iframe...") + iframe_element = await page.query_selector('iframe[src*="wohnungshelden.de"]') - # Look for application button ("Anfrage senden") in tab or footer - logger.info("[GEWOBAG] Looking for application button...") - selectors = [ - 'button.rental-contact', - 'button:has-text("Anfrage senden")', - 'div.contact-button button', - 'iframe#contact-iframe', - ] + if iframe_element: + iframe_url = await iframe_element.get_attribute('src') + logger.info(f"[GEWOBAG] Found Wohnungshelden iframe: {iframe_url}") - apply_btn = None - for sel in selectors: - all_btns = await page.query_selector_all(sel) - logger.info(f"[GEWOBAG] Selector '{sel}' found {len(all_btns)} matches") - for btn in all_btns: + # Navigate to the iframe URL directly in a new page + iframe_page = await self.context.new_page() + try: + await iframe_page.goto(iframe_url, wait_until="networkidle") + await asyncio.sleep(2) + logger.info("[GEWOBAG] Loaded Wohnungshelden application page") + + # Take screenshot + screenshot_path = DATA_DIR / f"gewobag_wohnungshelden_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info("[GEWOBAG] Saved Wohnungshelden screenshot") + + # Save HTML for debugging try: - if await btn.is_visible(): - apply_btn = btn - logger.info(f"[GEWOBAG] Found visible button with selector '{sel}'") - break + html_content = await iframe_page.content() + with open(DATA_DIR / f"gewobag_wohnungshelden_{listing['id']}.html", "w", encoding="utf-8") as f: + f.write(html_content) except Exception as e: - logger.warning(f"[GEWOBAG] Error checking button visibility: {e}") - if apply_btn: - break + logger.warning(f"[GEWOBAG] Could not save HTML: {e}") - # If not found, check for iframe (Wohnungshelden) - if not apply_btn: - iframe = await page.query_selector('iframe#contact-iframe') - if iframe: - logger.info("[GEWOBAG] Found Wohnungshelden iframe, switching context...") - frame = await iframe.content_frame() - if frame: - # Try to find a submit/apply button in the iframe - iframe_btns = await frame.query_selector_all('button, input[type="submit"]') - for btn in iframe_btns: - try: - if await btn.is_visible(): - apply_btn = btn - logger.info("[GEWOBAG] Found visible button in iframe") + # Fill out Wohnungshelden form + form_filled = False + + # Anrede (Salutation) - ng-select dropdown + try: + salutation_dropdown = await iframe_page.query_selector('#salutation-dropdown, ng-select[id*="salutation"]') + if salutation_dropdown: + await salutation_dropdown.click() + await asyncio.sleep(0.5) + anrede_option = await iframe_page.query_selector(f'.ng-option:has-text("{os.environ.get("FORM_ANREDE", "Herr")}")') + if anrede_option: + await anrede_option.click() + logger.info(f"[GEWOBAG] Selected Anrede: {os.environ.get('FORM_ANREDE', 'Herr')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not set Anrede: {e}") + + # Vorname (First name) + try: + vorname_field = await iframe_page.query_selector('#firstName') + if vorname_field: + await vorname_field.fill(os.environ.get("FORM_VORNAME", "")) + logger.info(f"[GEWOBAG] Filled Vorname: {os.environ.get('FORM_VORNAME', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Vorname: {e}") + + # Nachname (Last name) + try: + nachname_field = await iframe_page.query_selector('#lastName') + if nachname_field: + await nachname_field.fill(os.environ.get("FORM_NACHNAME", "")) + logger.info(f"[GEWOBAG] Filled Nachname: {os.environ.get('FORM_NACHNAME', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Nachname: {e}") + + # E-Mail + try: + email_field = await iframe_page.query_selector('#email') + if email_field: + await email_field.fill(os.environ.get("FORM_EMAIL", "")) + logger.info(f"[GEWOBAG] Filled E-Mail: {os.environ.get('FORM_EMAIL', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill E-Mail: {e}") + + # Telefonnummer + try: + tel_field = await iframe_page.query_selector('#phone-number, input[id*="telefonnummer"], input[id*="phone"]') + if tel_field: + await tel_field.fill(os.environ.get("FORM_PHONE", "")) + logger.info(f"[GEWOBAG] Filled Telefon: {os.environ.get('FORM_PHONE', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Telefon: {e}") + + # Straße (Street) - formcontrolname="street" + try: + strasse_field = await iframe_page.query_selector('#street, input[formcontrolname="street"]') + if strasse_field: + await strasse_field.fill(os.environ.get("FORM_STRASSE", "")) + logger.info(f"[GEWOBAG] Filled Straße: {os.environ.get('FORM_STRASSE', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Straße: {e}") + + # Hausnummer (House number) - formcontrolname="houseNumber" + try: + hausnr_field = await iframe_page.query_selector('input[formcontrolname="houseNumber"]') + if hausnr_field: + await hausnr_field.fill(os.environ.get("FORM_HAUSNUMMER", "")) + logger.info(f"[GEWOBAG] Filled Hausnummer: {os.environ.get('FORM_HAUSNUMMER', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Hausnummer: {e}") + + # PLZ (Postal code) - formcontrolname="zipCode" + try: + plz_field = await iframe_page.query_selector('input[formcontrolname="zipCode"]') + if plz_field: + await plz_field.fill(os.environ.get("FORM_PLZ", "")) + logger.info(f"[GEWOBAG] Filled PLZ: {os.environ.get('FORM_PLZ', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill PLZ: {e}") + + # Ort (City) - formcontrolname="city" + try: + ort_field = await iframe_page.query_selector('input[formcontrolname="city"]') + if ort_field: + await ort_field.fill(os.environ.get("FORM_ORT", "")) + logger.info(f"[GEWOBAG] Filled Ort: {os.environ.get('FORM_ORT', '')}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Ort: {e}") + + # Anzahl der einziehenden Erwachsenen + try: + anzahl_erwachsene = os.environ.get("FORM_ADULTS", "1") + adults_input = await iframe_page.query_selector('#formly_3_input_gewobag_anzahl_erwachsene_0') + if adults_input: + await adults_input.fill(anzahl_erwachsene) + logger.info(f"[GEWOBAG] Filled Anzahl Erwachsene: {anzahl_erwachsene}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Anzahl Erwachsene: {e}") + + # Anzahl der einziehenden Kinder + try: + anzahl_kinder = os.environ.get("FORM_CHILDREN", "0") + children_input = await iframe_page.query_selector('#formly_3_input_gewobag_anzahl_kinder_1') + if children_input: + await children_input.fill(anzahl_kinder) + logger.info(f"[GEWOBAG] Filled Anzahl Kinder: {anzahl_kinder}") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not fill Anzahl Kinder: {e}") + + # WBS (Wohnberechtigungsschein) - Click "Ja" radio button + try: + wbs_ja = await iframe_page.query_selector('input[type="radio"][id*="wbs_available"][id*="-Ja"]') + if wbs_ja: + await wbs_ja.click() + logger.info("[GEWOBAG] Selected WBS: Ja") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not select WBS: {e}") + + # Privacy checkbox 1 (WBS data consent) + try: + privacy_checkbox_1 = await iframe_page.query_selector('#formly_20_checkbox_gewobag_datenschutzhinweis_iv0027_bestaetigt_0') + if privacy_checkbox_1: + await privacy_checkbox_1.check() + logger.info("[GEWOBAG] Checked privacy checkbox 1 (WBS data consent)") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not check privacy checkbox 1: {e}") + + # Privacy checkbox 2 (Main Datenschutzbestimmungen) + try: + privacy_checkbox_2 = await iframe_page.query_selector('#formly_21_checkbox_gewobag_datenschutzhinweis_bestaetigt_0') + if privacy_checkbox_2: + await privacy_checkbox_2.check() + logger.info("[GEWOBAG] Checked privacy checkbox 2 (Datenschutzbestimmungen)") + form_filled = True + except Exception as e: + logger.warning(f"[GEWOBAG] Could not check privacy checkbox 2: {e}") + + await asyncio.sleep(1) + + # Screenshot after filling + screenshot_path = DATA_DIR / f"gewobag_filled_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info("[GEWOBAG] Saved filled form screenshot") + + # Try to submit + if form_filled: + try: + submit_selectors = [ + 'button[type="submit"]', + 'button:has-text("Absenden")', + 'button:has-text("Senden")', + 'button:has-text("Anfrage")', + '.btn-primary', + ] + + submit_btn = None + for selector in submit_selectors: + submit_btn = await iframe_page.query_selector(selector) + if submit_btn and await submit_btn.is_visible(): + logger.info(f"[GEWOBAG] Found submit button: {selector}") break - except Exception as e: - logger.warning(f"[GEWOBAG] Error checking iframe button visibility: {e}") + submit_btn = None - if apply_btn: - logger.info("[GEWOBAG] Found application button, scrolling into view...") - await apply_btn.scroll_into_view_if_needed() - await asyncio.sleep(0.5) - logger.info("[GEWOBAG] Clicking button...") - await apply_btn.click() - await asyncio.sleep(2) - # --- Post-click confirmation logic --- - logger.info("[GEWOBAG] Clicked application button, checking for confirmation...") - # Save screenshot and HTML after click - try: - await page.screenshot(path="data/gewobag_after_apply.png") - logger.info("[GEWOBAG] Saved screenshot after application click.") - except Exception as e: - logger.warning(f"[GEWOBAG] Could not save screenshot: {e}") - try: - html_after = await page.content() - with open("data/gewobag_after_apply.html", "w", encoding="utf-8") as f: - f.write(html_after) - logger.info("[GEWOBAG] Saved HTML after application click.") - except Exception as e: - logger.warning(f"[GEWOBAG] Could not save HTML after apply: {e}") + if submit_btn: + await submit_btn.click() + logger.info("[GEWOBAG] Clicked submit button") + await asyncio.sleep(3) - # Look for confirmation message on the page - confirmation_selectors = [ - 'text="Vielen Dank"', - 'text="Ihre Anfrage wurde gesendet"', - 'text="Bestätigung"', - 'div:has-text("Vielen Dank")', - 'div:has-text("Ihre Anfrage wurde gesendet")', - ] - confirmed = False - for sel in confirmation_selectors: - try: - el = await page.query_selector(sel) - if el and await el.is_visible(): - logger.info(f"[GEWOBAG] Found confirmation element: {sel}") - confirmed = True - break - except Exception as e: - logger.debug(f"[GEWOBAG] Error checking confirmation selector {sel}: {e}") - if confirmed: - result["success"] = True - result["message"] = "Application submitted and confirmation detected." - else: - logger.warning("[GEWOBAG] No confirmation message detected after application click.") - result["success"] = False - result["message"] = "Clicked application button, but no confirmation detected. Check screenshot and HTML." + # Screenshot after submission + screenshot_path = DATA_DIR / f"gewobag_submitted_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info("[GEWOBAG] Saved submission screenshot") + + # Check page content for errors or confirmation + page_content = await iframe_page.content() + + # Check for validation errors first + if "Es wurden nicht alle Felder korrekt befüllt" in page_content or "nicht alle Felder korrekt" in page_content: + result["success"] = False + result["message"] = "Form validation error: Not all fields filled correctly" + logger.warning("[GEWOBAG] Form validation error detected") + # Check for success confirmation + elif any(phrase in page_content for phrase in [ + "Vielen Dank", + "Ihre Anfrage wurde", + "erfolgreich", + "Bestätigung", + "Danke für Ihre Bewerbung", + "Bewerbung erhalten" + ]): + result["success"] = True + result["message"] = "Application submitted successfully - confirmation detected" + logger.info("[GEWOBAG] Success confirmation detected") + else: + # No confirmation found - mark as failed + result["success"] = False + result["message"] = "Submitted but no confirmation message found - check screenshot" + logger.warning("[GEWOBAG] No confirmation message found after submission") + else: + result["success"] = False + result["message"] = "Form filled but submit button not found" + logger.warning("[GEWOBAG] Submit button not found") + except Exception as e: + result["success"] = False + result["message"] = f"Submit error: {str(e)}" + logger.warning(f"[GEWOBAG] Submit error: {e}") + else: + result["success"] = False + result["message"] = "No form fields found in Wohnungshelden" + logger.warning("[GEWOBAG] Could not find form fields") + finally: + await iframe_page.close() else: - result["message"] = "No application button found." + result["success"] = False + result["message"] = "No Wohnungshelden iframe found" + logger.warning("[GEWOBAG] No Wohnungshelden iframe found") + screenshot_path = DATA_DIR / f"gewobag_nobtn_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path)) + except Exception as e: result["message"] = f"Error during application: {e}" logger.error(f"[GEWOBAG] Application error: {e}")