gewobag fix

This commit is contained in:
Aron Petau 2026-01-05 13:40:12 +01:00
parent cc40121e46
commit 4ea437e3e6
4 changed files with 312 additions and 124 deletions

View file

@ -21,6 +21,7 @@ FORM_HAUSNUMMER=1
FORM_PLZ=10115 FORM_PLZ=10115
FORM_ORT=Berlin FORM_ORT=Berlin
FORM_PERSONS=2 FORM_PERSONS=2
FORM_ADULTS=1
FORM_CHILDREN=0 FORM_CHILDREN=0
FORM_INCOME=2500 FORM_INCOME=2500

View file

@ -91,7 +91,8 @@ All scripts deduplicate by key and timestamp.
- `FORM_PLZ` - Postal code - `FORM_PLZ` - Postal code
- `FORM_ORT` - City - `FORM_ORT` - City
- `FORM_PERSONS` - Number of persons in household - `FORM_PERSONS` - Number of persons in household
- `FORM_CHILDREN` - Number of children - `FORM_ADULTS` - Number of adults (for GEWOBAG forms, defaults to 1)
- `FORM_CHILDREN` - Number of children (defaults to 0)
- `FORM_INCOME` - Monthly net income - `FORM_INCOME` - Monthly net income
### WGcompany filters ### WGcompany filters

View file

@ -1,6 +1,7 @@
from .base_handler import BaseHandler from .base_handler import BaseHandler
import logging import logging
import asyncio import asyncio
import os
from pathlib import Path from pathlib import Path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -22,33 +23,47 @@ class GesobauHandler(BaseHandler):
# 404 detection # 404 detection
status = response.status if response else None status = response.status if response else None
page_title = await page.title() page_title = await page.title()
if status == 404 or (page_title and "404" in page_title): page_content = await page.content()
logger.warning(f"[GESOBAU] Listing is down (404): {listing['link']}") is_404 = (
status == 404 or
(page_title and "404" in page_title) or
(page_title and "nicht gefunden" in page_title.lower()) or
("Angebot nicht mehr verfügbar" in page_content)
)
if is_404:
logger.warning(f"[GESOBAU] Listing is down (404 or unavailable): {listing['link']}")
result["success"] = False result["success"] = False
result["message"] = "Listing is no longer available (404). Application impossible. Will not retry." result["message"] = "Listing is no longer available (404). Application impossible. Will not retry."
result["deactivated"] = True result["deactivated"] = True
await page.close()
return result return result
# Always handle cookies and consent before anything else # Dismiss cookie banner
await self.handle_cookies(page)
await self.handle_consent(page)
# Save HTML after modal handling for debugging and check for deactivation
html_content = await page.content()
try: try:
with open(DATA_DIR / "gesobau_debug.html", "w", encoding="utf-8") as f: cookie_btn = await page.query_selector('#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll, button:has-text("Alle akzeptieren")')
f.write(html_content) if cookie_btn and await cookie_btn.is_visible():
except Exception as e: await cookie_btn.click()
logger.debug(f"[GESOBAU] Debug HTML not saved: {e}") logger.info("[GESOBAU] Dismissed cookie banner")
await asyncio.sleep(1)
except:
pass
# Tailored 404 detection: Angebot nicht mehr verfügbar # Save debug HTML and screenshot
if "Angebot nicht mehr verfügbar" in html_content: try:
logger.warning("[GESOBAU] Permanent fail: Angebot nicht mehr verfügbar") html_content = await page.content()
result["deactivated"] = True with open(DATA_DIR / f"gesobau_debug_{listing['id']}.html", "w", encoding="utf-8") as f:
result["message"] = "Listing is no longer available (Angebot nicht mehr verfügbar). Marked as deactivated." f.write(html_content)
await page.close() logger.info(f"[GESOBAU] Saved debug HTML: gesobau_debug_{listing['id']}.html")
return result except Exception as e:
logger.warning(f"[GESOBAU] Could not save debug HTML: {e}")
try:
await page.screenshot(path=DATA_DIR / f"gesobau_page_{listing['id']}.png", full_page=True)
logger.info(f"[GESOBAU] Saved page screenshot: gesobau_page_{listing['id']}.png")
except Exception as e:
logger.warning(f"[GESOBAU] Could not save screenshot: {e}")
# Log listing details
await self.log_listing_details(listing)
# Look for application button # Look for application button
logger.info("[GESOBAU] Searching for application button...") logger.info("[GESOBAU] Searching for application button...")
@ -56,6 +71,8 @@ class GesobauHandler(BaseHandler):
'a[href*="bewerben"]', 'a[href*="bewerben"]',
'button:has-text("Bewerben")', 'button:has-text("Bewerben")',
'a:has-text("Bewerben")', 'a:has-text("Bewerben")',
'button:has-text("Interesse")',
'a:has-text("Kontakt")',
'button.btn', 'button.btn',
] ]
@ -77,22 +94,25 @@ class GesobauHandler(BaseHandler):
if apply_btn: if apply_btn:
await apply_btn.scroll_into_view_if_needed() await apply_btn.scroll_into_view_if_needed()
await asyncio.sleep(0.5) await asyncio.sleep(0.5)
logger.info("[GESOBAU] Clicking application button...")
await apply_btn.click() await apply_btn.click()
await asyncio.sleep(2) await asyncio.sleep(2)
# --- Post-click confirmation logic ---
logger.info("[GESOBAU] Clicked application button, checking for confirmation...")
# Save screenshot and HTML after click # Save screenshot and HTML after click
logger.info("[GESOBAU] Checking for confirmation...")
try: try:
await page.screenshot(path="data/gesobau_after_apply.png") await page.screenshot(path=DATA_DIR / f"gesobau_after_apply_{listing['id']}.png", full_page=True)
logger.info("[GESOBAU] Saved screenshot after application click.") logger.info(f"[GESOBAU] Saved after-apply screenshot: gesobau_after_apply_{listing['id']}.png")
except Exception as e: except Exception as e:
logger.warning(f"[GESOBAU] Could not save screenshot: {e}") logger.warning(f"[GESOBAU] Could not save after-apply screenshot: {e}")
try: try:
html_after = await page.content() html_after = await page.content()
with open("data/gesobau_after_apply.html", "w", encoding="utf-8") as f: with open(DATA_DIR / f"gesobau_after_apply_{listing['id']}.html", "w", encoding="utf-8") as f:
f.write(html_after) f.write(html_after)
logger.info("[GESOBAU] Saved HTML after application click.") logger.info(f"[GESOBAU] Saved after-apply HTML: gesobau_after_apply_{listing['id']}.html")
except Exception as e: except Exception as e:
logger.warning(f"[GESOBAU] Could not save after-apply HTML: {e}")
logger.warning(f"[GESOBAU] Could not save HTML after apply: {e}") logger.warning(f"[GESOBAU] Could not save HTML after apply: {e}")
# Look for confirmation message on the page # Look for confirmation message on the page
@ -123,6 +143,9 @@ class GesobauHandler(BaseHandler):
else: else:
logger.warning("[GESOBAU] No application button found.") logger.warning("[GESOBAU] No application button found.")
result["message"] = "No application button found." result["message"] = "No application button found."
screenshot_path = DATA_DIR / f"gesobau_nobtn_{listing['id']}.png"
await page.screenshot(path=str(screenshot_path))
logger.info(f"[GESOBAU] Saved no-button screenshot: {screenshot_path}")
except Exception as e: except Exception as e:
result["message"] = f"Error during application: {e}" result["message"] = f"Error during application: {e}"
logger.error(f"[GESOBAU] Application error: {e}") logger.error(f"[GESOBAU] Application error: {e}")

View file

@ -1,6 +1,7 @@
from .base_handler import BaseHandler from .base_handler import BaseHandler
import logging import logging
import asyncio import asyncio
import os
from pathlib import Path from pathlib import Path
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -36,113 +37,275 @@ class GewobagHandler(BaseHandler):
result["deactivated"] = True result["deactivated"] = True
return result return result
# Always handle cookies and consent before anything else # Dismiss cookie banner
await self.handle_cookies(page)
await self.handle_consent(page)
# Save HTML after modal handling for debugging
try: try:
html_content = await page.content() cookie_btn = await page.query_selector('#CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll, button:has-text("Alle akzeptieren")')
with open(DATA_DIR / "gewobag_debug.html", "w", encoding="utf-8") as f: if cookie_btn and await cookie_btn.is_visible():
await cookie_btn.click()
logger.info("[GEWOBAG] Dismissed cookie banner")
await asyncio.sleep(1)
except:
pass
# Gewobag uses Wohnungshelden iframe directly on the page
logger.info("[GEWOBAG] Looking for Wohnungshelden iframe...")
iframe_element = await page.query_selector('iframe[src*="wohnungshelden.de"]')
if iframe_element:
iframe_url = await iframe_element.get_attribute('src')
logger.info(f"[GEWOBAG] Found Wohnungshelden iframe: {iframe_url}")
# Navigate to the iframe URL directly in a new page
iframe_page = await self.context.new_page()
try:
await iframe_page.goto(iframe_url, wait_until="networkidle")
await asyncio.sleep(2)
logger.info("[GEWOBAG] Loaded Wohnungshelden application page")
# Take screenshot
screenshot_path = DATA_DIR / f"gewobag_wohnungshelden_{listing['id']}.png"
await iframe_page.screenshot(path=str(screenshot_path), full_page=True)
logger.info("[GEWOBAG] Saved Wohnungshelden screenshot")
# Save HTML for debugging
try:
html_content = await iframe_page.content()
with open(DATA_DIR / f"gewobag_wohnungshelden_{listing['id']}.html", "w", encoding="utf-8") as f:
f.write(html_content) f.write(html_content)
except Exception as e: except Exception as e:
logger.warning(f"[GEWOBAG] Could not save debug HTML: {e}") logger.warning(f"[GEWOBAG] Could not save HTML: {e}")
# Log listing details # Fill out Wohnungshelden form
await self.log_listing_details(listing) form_filled = False
# Look for application button ("Anfrage senden") in tab or footer # Anrede (Salutation) - ng-select dropdown
logger.info("[GEWOBAG] Looking for application button...")
selectors = [
'button.rental-contact',
'button:has-text("Anfrage senden")',
'div.contact-button button',
'iframe#contact-iframe',
]
apply_btn = None
for sel in selectors:
all_btns = await page.query_selector_all(sel)
logger.info(f"[GEWOBAG] Selector '{sel}' found {len(all_btns)} matches")
for btn in all_btns:
try: try:
if await btn.is_visible(): salutation_dropdown = await iframe_page.query_selector('#salutation-dropdown, ng-select[id*="salutation"]')
apply_btn = btn if salutation_dropdown:
logger.info(f"[GEWOBAG] Found visible button with selector '{sel}'") await salutation_dropdown.click()
break
except Exception as e:
logger.warning(f"[GEWOBAG] Error checking button visibility: {e}")
if apply_btn:
break
# If not found, check for iframe (Wohnungshelden)
if not apply_btn:
iframe = await page.query_selector('iframe#contact-iframe')
if iframe:
logger.info("[GEWOBAG] Found Wohnungshelden iframe, switching context...")
frame = await iframe.content_frame()
if frame:
# Try to find a submit/apply button in the iframe
iframe_btns = await frame.query_selector_all('button, input[type="submit"]')
for btn in iframe_btns:
try:
if await btn.is_visible():
apply_btn = btn
logger.info("[GEWOBAG] Found visible button in iframe")
break
except Exception as e:
logger.warning(f"[GEWOBAG] Error checking iframe button visibility: {e}")
if apply_btn:
logger.info("[GEWOBAG] Found application button, scrolling into view...")
await apply_btn.scroll_into_view_if_needed()
await asyncio.sleep(0.5) await asyncio.sleep(0.5)
logger.info("[GEWOBAG] Clicking button...") anrede_option = await iframe_page.query_selector(f'.ng-option:has-text("{os.environ.get("FORM_ANREDE", "Herr")}")')
await apply_btn.click() if anrede_option:
await asyncio.sleep(2) await anrede_option.click()
# --- Post-click confirmation logic --- logger.info(f"[GEWOBAG] Selected Anrede: {os.environ.get('FORM_ANREDE', 'Herr')}")
logger.info("[GEWOBAG] Clicked application button, checking for confirmation...") form_filled = True
# Save screenshot and HTML after click
try:
await page.screenshot(path="data/gewobag_after_apply.png")
logger.info("[GEWOBAG] Saved screenshot after application click.")
except Exception as e: except Exception as e:
logger.warning(f"[GEWOBAG] Could not save screenshot: {e}") logger.warning(f"[GEWOBAG] Could not set Anrede: {e}")
try:
html_after = await page.content()
with open("data/gewobag_after_apply.html", "w", encoding="utf-8") as f:
f.write(html_after)
logger.info("[GEWOBAG] Saved HTML after application click.")
except Exception as e:
logger.warning(f"[GEWOBAG] Could not save HTML after apply: {e}")
# Look for confirmation message on the page # Vorname (First name)
confirmation_selectors = [
'text="Vielen Dank"',
'text="Ihre Anfrage wurde gesendet"',
'text="Bestätigung"',
'div:has-text("Vielen Dank")',
'div:has-text("Ihre Anfrage wurde gesendet")',
]
confirmed = False
for sel in confirmation_selectors:
try: try:
el = await page.query_selector(sel) vorname_field = await iframe_page.query_selector('#firstName')
if el and await el.is_visible(): if vorname_field:
logger.info(f"[GEWOBAG] Found confirmation element: {sel}") await vorname_field.fill(os.environ.get("FORM_VORNAME", ""))
confirmed = True logger.info(f"[GEWOBAG] Filled Vorname: {os.environ.get('FORM_VORNAME', '')}")
break form_filled = True
except Exception as e: except Exception as e:
logger.debug(f"[GEWOBAG] Error checking confirmation selector {sel}: {e}") logger.warning(f"[GEWOBAG] Could not fill Vorname: {e}")
if confirmed:
result["success"] = True # Nachname (Last name)
result["message"] = "Application submitted and confirmation detected." try:
else: nachname_field = await iframe_page.query_selector('#lastName')
logger.warning("[GEWOBAG] No confirmation message detected after application click.") if nachname_field:
await nachname_field.fill(os.environ.get("FORM_NACHNAME", ""))
logger.info(f"[GEWOBAG] Filled Nachname: {os.environ.get('FORM_NACHNAME', '')}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill Nachname: {e}")
# E-Mail
try:
email_field = await iframe_page.query_selector('#email')
if email_field:
await email_field.fill(os.environ.get("FORM_EMAIL", ""))
logger.info(f"[GEWOBAG] Filled E-Mail: {os.environ.get('FORM_EMAIL', '')}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill E-Mail: {e}")
# Telefonnummer
try:
tel_field = await iframe_page.query_selector('#phone-number, input[id*="telefonnummer"], input[id*="phone"]')
if tel_field:
await tel_field.fill(os.environ.get("FORM_PHONE", ""))
logger.info(f"[GEWOBAG] Filled Telefon: {os.environ.get('FORM_PHONE', '')}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill Telefon: {e}")
# Straße (Street) - formcontrolname="street"
try:
strasse_field = await iframe_page.query_selector('#street, input[formcontrolname="street"]')
if strasse_field:
await strasse_field.fill(os.environ.get("FORM_STRASSE", ""))
logger.info(f"[GEWOBAG] Filled Straße: {os.environ.get('FORM_STRASSE', '')}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill Straße: {e}")
# Hausnummer (House number) - formcontrolname="houseNumber"
try:
hausnr_field = await iframe_page.query_selector('input[formcontrolname="houseNumber"]')
if hausnr_field:
await hausnr_field.fill(os.environ.get("FORM_HAUSNUMMER", ""))
logger.info(f"[GEWOBAG] Filled Hausnummer: {os.environ.get('FORM_HAUSNUMMER', '')}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill Hausnummer: {e}")
# PLZ (Postal code) - formcontrolname="zipCode"
try:
plz_field = await iframe_page.query_selector('input[formcontrolname="zipCode"]')
if plz_field:
await plz_field.fill(os.environ.get("FORM_PLZ", ""))
logger.info(f"[GEWOBAG] Filled PLZ: {os.environ.get('FORM_PLZ', '')}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill PLZ: {e}")
# Ort (City) - formcontrolname="city"
try:
ort_field = await iframe_page.query_selector('input[formcontrolname="city"]')
if ort_field:
await ort_field.fill(os.environ.get("FORM_ORT", ""))
logger.info(f"[GEWOBAG] Filled Ort: {os.environ.get('FORM_ORT', '')}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill Ort: {e}")
# Anzahl der einziehenden Erwachsenen
try:
anzahl_erwachsene = os.environ.get("FORM_ADULTS", "1")
adults_input = await iframe_page.query_selector('#formly_3_input_gewobag_anzahl_erwachsene_0')
if adults_input:
await adults_input.fill(anzahl_erwachsene)
logger.info(f"[GEWOBAG] Filled Anzahl Erwachsene: {anzahl_erwachsene}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill Anzahl Erwachsene: {e}")
# Anzahl der einziehenden Kinder
try:
anzahl_kinder = os.environ.get("FORM_CHILDREN", "0")
children_input = await iframe_page.query_selector('#formly_3_input_gewobag_anzahl_kinder_1')
if children_input:
await children_input.fill(anzahl_kinder)
logger.info(f"[GEWOBAG] Filled Anzahl Kinder: {anzahl_kinder}")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not fill Anzahl Kinder: {e}")
# WBS (Wohnberechtigungsschein) - Click "Ja" radio button
try:
wbs_ja = await iframe_page.query_selector('input[type="radio"][id*="wbs_available"][id*="-Ja"]')
if wbs_ja:
await wbs_ja.click()
logger.info("[GEWOBAG] Selected WBS: Ja")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not select WBS: {e}")
# Privacy checkbox 1 (WBS data consent)
try:
privacy_checkbox_1 = await iframe_page.query_selector('#formly_20_checkbox_gewobag_datenschutzhinweis_iv0027_bestaetigt_0')
if privacy_checkbox_1:
await privacy_checkbox_1.check()
logger.info("[GEWOBAG] Checked privacy checkbox 1 (WBS data consent)")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not check privacy checkbox 1: {e}")
# Privacy checkbox 2 (Main Datenschutzbestimmungen)
try:
privacy_checkbox_2 = await iframe_page.query_selector('#formly_21_checkbox_gewobag_datenschutzhinweis_bestaetigt_0')
if privacy_checkbox_2:
await privacy_checkbox_2.check()
logger.info("[GEWOBAG] Checked privacy checkbox 2 (Datenschutzbestimmungen)")
form_filled = True
except Exception as e:
logger.warning(f"[GEWOBAG] Could not check privacy checkbox 2: {e}")
await asyncio.sleep(1)
# Screenshot after filling
screenshot_path = DATA_DIR / f"gewobag_filled_{listing['id']}.png"
await iframe_page.screenshot(path=str(screenshot_path), full_page=True)
logger.info("[GEWOBAG] Saved filled form screenshot")
# Try to submit
if form_filled:
try:
submit_selectors = [
'button[type="submit"]',
'button:has-text("Absenden")',
'button:has-text("Senden")',
'button:has-text("Anfrage")',
'.btn-primary',
]
submit_btn = None
for selector in submit_selectors:
submit_btn = await iframe_page.query_selector(selector)
if submit_btn and await submit_btn.is_visible():
logger.info(f"[GEWOBAG] Found submit button: {selector}")
break
submit_btn = None
if submit_btn:
await submit_btn.click()
logger.info("[GEWOBAG] Clicked submit button")
await asyncio.sleep(3)
# Screenshot after submission
screenshot_path = DATA_DIR / f"gewobag_submitted_{listing['id']}.png"
await iframe_page.screenshot(path=str(screenshot_path), full_page=True)
logger.info("[GEWOBAG] Saved submission screenshot")
# Check page content for errors or confirmation
page_content = await iframe_page.content()
# Check for validation errors first
if "Es wurden nicht alle Felder korrekt befüllt" in page_content or "nicht alle Felder korrekt" in page_content:
result["success"] = False result["success"] = False
result["message"] = "Clicked application button, but no confirmation detected. Check screenshot and HTML." result["message"] = "Form validation error: Not all fields filled correctly"
logger.warning("[GEWOBAG] Form validation error detected")
# Check for success confirmation
elif any(phrase in page_content for phrase in [
"Vielen Dank",
"Ihre Anfrage wurde",
"erfolgreich",
"Bestätigung",
"Danke für Ihre Bewerbung",
"Bewerbung erhalten"
]):
result["success"] = True
result["message"] = "Application submitted successfully - confirmation detected"
logger.info("[GEWOBAG] Success confirmation detected")
else: else:
result["message"] = "No application button found." # No confirmation found - mark as failed
result["success"] = False
result["message"] = "Submitted but no confirmation message found - check screenshot"
logger.warning("[GEWOBAG] No confirmation message found after submission")
else:
result["success"] = False
result["message"] = "Form filled but submit button not found"
logger.warning("[GEWOBAG] Submit button not found")
except Exception as e:
result["success"] = False
result["message"] = f"Submit error: {str(e)}"
logger.warning(f"[GEWOBAG] Submit error: {e}")
else:
result["success"] = False
result["message"] = "No form fields found in Wohnungshelden"
logger.warning("[GEWOBAG] Could not find form fields")
finally:
await iframe_page.close()
else:
result["success"] = False
result["message"] = "No Wohnungshelden iframe found"
logger.warning("[GEWOBAG] No Wohnungshelden iframe found")
screenshot_path = DATA_DIR / f"gewobag_nobtn_{listing['id']}.png"
await page.screenshot(path=str(screenshot_path))
except Exception as e: except Exception as e:
result["message"] = f"Error during application: {e}" result["message"] = f"Error during application: {e}"
logger.error(f"[GEWOBAG] Application error: {e}") logger.error(f"[GEWOBAG] Application error: {e}")