diff --git a/.env.example b/.env.example index 406a77c..b404631 100644 --- a/.env.example +++ b/.env.example @@ -3,9 +3,31 @@ TELEGRAM_BOT_TOKEN=your_bot_token_here TELEGRAM_CHAT_ID=your_chat_id_here -# inberlinwohnen.de Login -INBERLIN_EMAIL=aron@petau.net -INBERLIN_PASSWORD=BvA5n0iKmGV1 +# inberlinwohnen.de Login (optional - for personalized filtered results) +INBERLIN_EMAIL=your_email@example.com +INBERLIN_PASSWORD=your_password_here -# Check interval in seconds (default: 600 = 10 minutes) -CHECK_INTERVAL=600 +# Check interval in seconds (default: 300 = 5 minutes) +CHECK_INTERVAL=300 + +# Form Data for Autopilot Applications +FORM_ANREDE=Herr +FORM_VORNAME=Max +FORM_NACHNAME=Mustermann +FORM_EMAIL=max@example.com +FORM_PHONE=030123456789 +FORM_STRASSE=Musterstraße +FORM_HAUSNUMMER=1 +FORM_PLZ=10115 +FORM_ORT=Berlin +FORM_PERSONS=2 +FORM_CHILDREN=0 +FORM_INCOME=2500 + +# WGcompany.de Search Filters (optional) +WGCOMPANY_ENABLED=true +WGCOMPANY_MIN_SIZE= +WGCOMPANY_MAX_SIZE= +WGCOMPANY_MIN_PRICE= +WGCOMPANY_MAX_PRICE= +WGCOMPANY_BEZIRK=0 diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..b6c4dd2 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,81 @@ +# Copilot Instructions for inberlin-monitor + +## Project Overview + +A Python-based apartment monitoring bot for Berlin's public housing portal (inberlinwohnen.de) and WG rooms (wgcompany.de). Monitors listings from 6 housing companies (HOWOGE, Gewobag, Degewo, Gesobau, Stadt und Land, WBM) plus WGcompany, and sends Telegram notifications with optional auto-application via Playwright browser automation. + +## Architecture + +**Single-file monolith** (`monitor.py`, ~1600 lines) with five main classes: +- `InBerlinMonitor` - Core scraping/monitoring loop for inberlinwohnen.de, login handling, listing detection +- `WGCompanyMonitor` - Monitors wgcompany.de WG rooms with configurable search filters +- `ApplicationHandler` - Company-specific form automation (each `_apply_*` method handles one housing company) +- `TelegramBot` - Command handling via long-polling in a daemon thread +- Main loop runs synchronous with `asyncio.get_event_loop().run_until_complete()` for Playwright calls + +**Data flow**: Fetch listings → Compare with `listings.json` / `wgcompany_listings.json` → Detect new → Log to CSV → Auto-apply if autopilot enabled (inberlin only) → Save to `applications.json` → Send Telegram notification + +## Key Patterns + +### Company-specific handlers +Each housing company has a dedicated `_apply_{company}()` method in `ApplicationHandler`. When adding support for a new company: +1. Add detection in `_detect_company()` (line ~350) +2. Add handler call in `apply()` switch (line ~330) +3. Implement `_apply_newcompany()` following existing patterns (cookie dismiss → find button → fill form → submit → screenshot) + +### Listing identification +Listings are hashed by `md5(key_fields)[:12]` to generate stable IDs: +- InBerlin: `md5(rooms+size+price+address)` +- WGcompany: `md5(link+price+size)` + +### State management +- `state.json` - Runtime state (autopilot toggle) +- `listings.json` - Previously seen inberlinwohnen listings +- `wgcompany_listings.json` - Previously seen WGcompany listings +- `applications.json` - Application history with success/failure status +- `listing_times.csv` / `wgcompany_times.csv` - Time-series data for pattern analysis + +## Development + +### Run locally +```bash +# Install deps (requires Playwright) +pip install -r requirements.txt +playwright install chromium + +# Set env vars and run +export TELEGRAM_BOT_TOKEN=... TELEGRAM_CHAT_ID=... +python monitor.py +``` + +### Docker (production) +```bash +cp .env.example .env # Configure credentials +docker compose up -d +docker compose logs -f +``` + +### Debugging +- Screenshots saved to `data/` on application failures (`*_nobtn_*.png`) +- HTML saved to `data/debug_page.html` (inberlin) and `data/wgcompany_debug.html` +- Full logs in `data/monitor.log` + +## Environment Variables + +Required: `TELEGRAM_BOT_TOKEN`, `TELEGRAM_CHAT_ID` +InBerlin login: `INBERLIN_EMAIL`, `INBERLIN_PASSWORD` +Form data: `FORM_ANREDE`, `FORM_VORNAME`, `FORM_NACHNAME`, `FORM_EMAIL`, `FORM_PHONE`, `FORM_STRASSE`, `FORM_HAUSNUMMER`, `FORM_PLZ`, `FORM_ORT`, `FORM_PERSONS`, `FORM_CHILDREN`, `FORM_INCOME` +WGcompany: `WGCOMPANY_ENABLED`, `WGCOMPANY_MIN_SIZE`, `WGCOMPANY_MAX_SIZE`, `WGCOMPANY_MIN_PRICE`, `WGCOMPANY_MAX_PRICE`, `WGCOMPANY_BEZIRK` + +## Common Tasks + +### Fix a broken company handler +Check `data/*_nobtn_*.png` screenshots and `data/debug_page.html` to see actual page structure. Update selectors in the corresponding `_apply_{company}()` method. + +### Add Telegram command +1. Add case in `TelegramBot._handle_update()` (line ~95) +2. Implement `_handle_{command}_command()` method + +### Modify listing extraction +- InBerlin: Update regex patterns in `InBerlinMonitor.fetch_listings()`. Test against `data/debug_page.html`. +- WGcompany: Update parsing in `WGCompanyMonitor.fetch_listings()`. Test against `data/wgcompany_debug.html`. diff --git a/.gitignore b/.gitignore index e94e366..0017322 100644 --- a/.gitignore +++ b/.gitignore @@ -4,10 +4,12 @@ __pycache__/ *.class *.so .Python +.venv .venv/ venv/ ENV/ env/ +.python-version # Data data/ @@ -24,3 +26,8 @@ data/ # OS .DS_Store Thumbs.db + +# Debug files (these should be in data/ but just in case) +debug_page.html +*.html +!README.md diff --git a/.python-version b/.python-version index b6d8b76..24ee5b1 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.11.8 +3.13 diff --git a/BOTFATHER_COMMANDS.txt b/BOTFATHER_COMMANDS.txt new file mode 100644 index 0000000..3fef6a9 --- /dev/null +++ b/BOTFATHER_COMMANDS.txt @@ -0,0 +1,6 @@ +Copy this to BotFather when setting commands with /setcommands: + +autopilot - Toggle automatic applications (on/off) +status - Show current status and stats +plot - Show weekly listing patterns +help - Show available commands diff --git a/Dockerfile b/Dockerfile index 5542e12..d0e2903 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/playwright/python:v1.56.0-jammy +FROM mcr.microsoft.com/playwright/python:v1.57.0-jammy WORKDIR /app diff --git a/README.md b/README.md index 9799f5a..a56500e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # inberlin-monitor -Monitors [inberlinwohnen.de](https://www.inberlinwohnen.de/wohnungsfinder/) for new apartment listings and sends Telegram notifications. Supports automatic application submission via autopilot mode. +Monitors [inberlinwohnen.de](https://www.inberlinwohnen.de/wohnungsfinder/) and [wgcompany.de](http://www.wgcompany.de) for new apartment/WG listings and sends Telegram notifications. Supports automatic application submission via autopilot mode. ## Features @@ -9,10 +9,22 @@ Monitors [inberlinwohnen.de](https://www.inberlinwohnen.de/wohnungsfinder/) for - 📱 Sends Telegram notifications for new listings with clickable links - 🤖 **Autopilot mode**: Automatically applies to new listings - 📊 **/plot command**: Visualize when listings appear throughout the week -- 🏢 Supports multiple housing companies: HOWOGE, Gewobag, Degewo, Gesobau, Stadt und Land, WBM +- 🏢 Supports 6 housing companies: HOWOGE, Gewobag, Degewo, Gesobau, Stadt und Land, WBM +- 🏠 **WGcompany.de**: Also monitors WG room listings with configurable search filters - 💾 Persists state to detect only truly new listings - 📈 Logs listing times for pattern analysis +## Supported Housing Companies + +| Company | Auto-Apply | Notes | +|---------|------------|-------| +| HOWOGE | ✅ | Direct form submission | +| Gewobag | ✅ | Direct form submission | +| Degewo | ✅ | Via Wohnungshelden portal | +| Gesobau | ✅ | Direct form submission | +| Stadt und Land | ✅ | Direct form submission | +| WBM | ✅ | Direct form submission | + ## Setup ### 1. Create Telegram Bot @@ -80,13 +92,28 @@ cat data/monitor.log | Variable | Description | Default | |----------|-------------|---------| | `FORM_ANREDE` | Salutation (Herr/Frau) | Herr | -| `FORM_VORNAME` | First name | Aron | -| `FORM_NACHNAME` | Last name | Petau | -| `FORM_EMAIL` | Contact email | `aron@petau.net` | -| `FORM_PHONE` | Phone number | 017695773688 | +| `FORM_VORNAME` | First name | - | +| `FORM_NACHNAME` | Last name | - | +| `FORM_EMAIL` | Contact email | - | +| `FORM_PHONE` | Phone number | - | +| `FORM_STRASSE` | Street name | - | +| `FORM_HAUSNUMMER` | House number | - | +| `FORM_PLZ` | Postal code | - | +| `FORM_ORT` | City | Berlin | | `FORM_PERSONS` | Number of persons moving in | 1 | | `FORM_CHILDREN` | Number of children | 0 | -| `FORM_INCOME` | Monthly household net income (€) | 1600 | +| `FORM_INCOME` | Monthly household net income (€) | - | + +### Optional - WGcompany.de Search Filters + +| Variable | Description | Default | +|----------|-------------|---------| +| `WGCOMPANY_ENABLED` | Enable WGcompany monitoring | true | +| `WGCOMPANY_MIN_SIZE` | Minimum room size (m²) | - | +| `WGCOMPANY_MAX_SIZE` | Maximum room size (m²) | - | +| `WGCOMPANY_MIN_PRICE` | Minimum rent (€) | - | +| `WGCOMPANY_MAX_PRICE` | Maximum rent (€) | - | +| `WGCOMPANY_BEZIRK` | District code (0=all) | 0 | ## Without Login @@ -98,10 +125,13 @@ All data is stored in the `./data` directory: | File | Description | |------|-------------| -| `listings.json` | Known listings (for duplicate detection) | +| `listings.json` | Known inberlinwohnen listings | +| `wgcompany_listings.json` | Known WGcompany listings | | `state.json` | Monitor state (autopilot on/off) | | `applications.json` | Record of submitted applications | -| `listing_times.csv` | Timing data for pattern analysis | +| `listing_times.csv` | InBerlin timing data for pattern analysis | +| `wgcompany_times.csv` | WGcompany timing data | | `monitor.log` | Application logs | | `weekly_plot.png` | Generated plot from /plot command | +| `wgcompany_debug.html` | Debug HTML from WGcompany | | `*.png` | Screenshots from application attempts | diff --git a/bot_logo.png b/bot_logo.png new file mode 100644 index 0000000..b408f53 Binary files /dev/null and b/bot_logo.png differ diff --git a/debug_page.html b/debug_page.html deleted file mode 100644 index 933dcc9..0000000 --- a/debug_page.html +++ /dev/null @@ -1,3399 +0,0 @@ - - - - - - - - - - - - - - - - - - - -
-
-
- - - - -
-
- -
-
- -
- - - - -
- -
-
- - -
- - -
-
- -
- -
-
-
- -
-
- -
- -
-
-
-
-
-
- - -
-
-
-
- - -
-
- - -
-
-
-
- - -
-
- - Loading... -
- -
- Wir haben 54 Wohnungen für Sie gefunden -
- -
- - - -
-
- - - - Sortierung/Filter: - - -
-
- - - -
- - -
-
-
- - - -
- - -
- - -
-
- - - - -
- Keine Netzwekverbindung. Sie sind offline. -
- - - - - - -
-
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- - -
- - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
- -
- - - - - - - - -
- -

Änderungen/Irrtümer vorbehalten.
Die genauen Wohnungsdaten entnehmen Sie bitte den jeweiligen Exposés der Wohnungsunternehmen (per Klick auf »Alle Details«). -

-
-
- -
- -
-
-
- - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 9fec014..405e11d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,23 +3,7 @@ services: build: . container_name: inberlin-monitor restart: unless-stopped - environment: - # Telegram notifications - - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN} - - TELEGRAM_CHAT_ID=${TELEGRAM_CHAT_ID} - # inberlinwohnen.de login - - INBERLIN_EMAIL=${INBERLIN_EMAIL} - - INBERLIN_PASSWORD=${INBERLIN_PASSWORD} - # Check interval in seconds (default: 300 = 5 minutes) - - CHECK_INTERVAL=${CHECK_INTERVAL:-300} - # Form data for applications - - FORM_ANREDE=${FORM_ANREDE:-Herr} - - FORM_VORNAME=${FORM_VORNAME:-Aron} - - FORM_NACHNAME=${FORM_NACHNAME:-Petau} - - FORM_EMAIL=${FORM_EMAIL:-aron@petau.net} - - FORM_PHONE=${FORM_PHONE:-017695773688} - - FORM_PERSONS=${FORM_PERSONS:-1} - - FORM_CHILDREN=${FORM_CHILDREN:-0} - - FORM_INCOME=${FORM_INCOME:-1600} + env_file: + - .env volumes: - ./data:/data diff --git a/monitor.py b/monitor.py index d2201b9..cd8dc4f 100644 --- a/monitor.py +++ b/monitor.py @@ -25,15 +25,29 @@ INBERLIN_EMAIL = os.environ.get("INBERLIN_EMAIL", "") INBERLIN_PASSWORD = os.environ.get("INBERLIN_PASSWORD", "") CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", "300")) # seconds (5 minutes) +# WGcompany search configuration +WGCOMPANY_ENABLED = os.environ.get("WGCOMPANY_ENABLED", "true").lower() == "true" +WGCOMPANY_MIN_SIZE = os.environ.get("WGCOMPANY_MIN_SIZE", "") # min room size m² +WGCOMPANY_MAX_SIZE = os.environ.get("WGCOMPANY_MAX_SIZE", "") # max room size m² +WGCOMPANY_MIN_PRICE = os.environ.get("WGCOMPANY_MIN_PRICE", "") # min rent € +WGCOMPANY_MAX_PRICE = os.environ.get("WGCOMPANY_MAX_PRICE", "") # max rent € +WGCOMPANY_BEZIRK = os.environ.get("WGCOMPANY_BEZIRK", "0") # 0=egal, or specific district code +WGCOMPANY_AGE = os.environ.get("WGCOMPANY_AGE", "") # your age (for WG matching) +WGCOMPANY_SMOKER = os.environ.get("WGCOMPANY_SMOKER", "") # NR=Nichtraucher, R=Raucher, empty=egal + # Form data for applications -FORM_ANREDE = os.environ.get("FORM_ANREDE", "Herr") -FORM_VORNAME = os.environ.get("FORM_VORNAME", "Aron") -FORM_NACHNAME = os.environ.get("FORM_NACHNAME", "Petau") -FORM_EMAIL = os.environ.get("FORM_EMAIL", "aron@petau.net") -FORM_PHONE = os.environ.get("FORM_PHONE", "017695773688") +FORM_ANREDE = os.environ.get("FORM_ANREDE", "") +FORM_VORNAME = os.environ.get("FORM_VORNAME", "") +FORM_NACHNAME = os.environ.get("FORM_NACHNAME", "") +FORM_EMAIL = os.environ.get("FORM_EMAIL", "") +FORM_PHONE = os.environ.get("FORM_PHONE", "") +FORM_STRASSE = os.environ.get("FORM_STRASSE", "") +FORM_HAUSNUMMER = os.environ.get("FORM_HAUSNUMMER", "") +FORM_PLZ = os.environ.get("FORM_PLZ", "") +FORM_ORT = os.environ.get("FORM_ORT", "") FORM_PERSONS = os.environ.get("FORM_PERSONS", "1") FORM_CHILDREN = os.environ.get("FORM_CHILDREN", "0") -FORM_INCOME = os.environ.get("FORM_INCOME", "1600") +FORM_INCOME = os.environ.get("FORM_INCOME", "") DATA_DIR = Path("/data") LISTINGS_FILE = DATA_DIR / "listings.json" @@ -42,6 +56,10 @@ TIMING_FILE = DATA_DIR / "listing_times.csv" STATE_FILE = DATA_DIR / "state.json" APPLICATIONS_FILE = DATA_DIR / "applications.json" +# WGcompany specific files +WGCOMPANY_LISTINGS_FILE = DATA_DIR / "wgcompany_listings.json" +WGCOMPANY_TIMING_FILE = DATA_DIR / "wgcompany_times.csv" + # Setup logging logging.basicConfig( level=logging.INFO, @@ -108,8 +126,8 @@ class TelegramBot: self._handle_help_command() elif text == "/plot": self._handle_plot_command() - else: - logger.debug(f"Unknown command: {text}") + elif text.startswith("/"): + self._handle_unknown_command(text) def _handle_autopilot_command(self, text): logger.info(f"Processing autopilot command: {text}") @@ -156,6 +174,10 @@ class TelegramBot: When autopilot is ON, I will automatically apply to new listings.""" self._send_message(help_text) + def _handle_unknown_command(self, text): + cmd = text.split()[0] if text else text + self._send_message(f"❓ Unknown command: {cmd}\n\nUse /help to see available commands.") + def _handle_plot_command(self): """Generate and send a plot of listing times""" logger.info("Generating listing times plot...") @@ -375,12 +397,56 @@ class ApplicationHandler: await asyncio.sleep(1) except: pass - # Look for "Besichtigung vereinbaren" button - logger.info("[HOWOGE] Looking for 'Besichtigung vereinbaren' button...") - apply_btn = await page.query_selector('a:has-text("Besichtigung vereinbaren"), button:has-text("Besichtigung vereinbaren"), a:has-text("Anfragen"), button:has-text("Anfragen")') + # Try to handle consent manager (consentmanager.net) + try: + consent_selectors = [ + '#cmpbntyestxt', '.cmpboxbtnyes', 'a.cmpboxbtn.cmpboxbtnyes', + '#cmpwelcomebtnyes', '.cmptxt_btn_yes' + ] + for sel in consent_selectors: + consent_btn = await page.query_selector(sel) + if consent_btn and await consent_btn.is_visible(): + await consent_btn.click() + logger.info("[HOWOGE] Dismissed consent manager") + await asyncio.sleep(1) + break + except: pass - if apply_btn and await apply_btn.is_visible(): - logger.info("[HOWOGE] Found application button, clicking...") + # Look for "Besichtigung vereinbaren" button + # HOWOGE has multiple buttons with same text - only one is visible + logger.info("[HOWOGE] Looking for 'Besichtigung vereinbaren' button...") + + # Use href selector - more reliable than text matching + selectors = [ + 'a[href*="besichtigung-vereinbaren"]', + 'a:has-text("Besichtigung vereinbaren")', + 'button:has-text("Besichtigung vereinbaren")', + 'a:has-text("Anfragen")', + 'button:has-text("Anfragen")' + ] + + apply_btn = None + for sel in selectors: + all_btns = await page.query_selector_all(sel) + logger.info(f"[HOWOGE] Selector '{sel}' found {len(all_btns)} matches") + # Find first visible button + for btn in all_btns: + try: + if await btn.is_visible(): + apply_btn = btn + logger.info(f"[HOWOGE] Found visible button with selector '{sel}'") + break + except: + pass + if apply_btn: + break + + if apply_btn: + # Scroll the button into view and click + logger.info("[HOWOGE] Found application button, scrolling into view...") + await apply_btn.scroll_into_view_if_needed() + await asyncio.sleep(0.5) + logger.info("[HOWOGE] Clicking button...") await apply_btn.click() await asyncio.sleep(3) await page.wait_for_load_state("networkidle") @@ -520,6 +586,11 @@ class ApplicationHandler: return result async def _apply_degewo(self, listing: dict, result: dict) -> dict: + """ + Degewo uses Wohnungshelden (app.wohnungshelden.de) for their application system. + The application form is loaded in an iframe from a different domain. + We need to navigate directly to the iframe URL or interact with the iframe. + """ page = await self.context.new_page() try: logger.info(f"[DEGEWO] Opening page: {listing['link']}") @@ -527,6 +598,7 @@ class ApplicationHandler: logger.info("[DEGEWO] Page loaded") await asyncio.sleep(2) + # Dismiss cookie banner try: cookie_btn = await page.query_selector('button:has-text("Alle akzeptieren"), #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll') if cookie_btn and await cookie_btn.is_visible(): @@ -542,155 +614,186 @@ class ApplicationHandler: await apply_btn.click() await asyncio.sleep(3) - # Fill out the contact form - logger.info("[DEGEWO] Filling out contact form...") + # Degewo uses Wohnungshelden iframe for the application form + # Find the iframe and get its URL to navigate directly + iframe_element = await page.query_selector('iframe[src*="wohnungshelden.de"]') + if iframe_element: + iframe_url = await iframe_element.get_attribute('src') + logger.info(f"[DEGEWO] Found Wohnungshelden iframe: {iframe_url}") - # Anrede - select from env - try: - anrede_select = await page.query_selector('select[name*="anrede"], select[name*="salutation"], select[id*="anrede"]') - if anrede_select: - await anrede_select.select_option(label=FORM_ANREDE) - logger.info(f"[DEGEWO] Selected Anrede: {FORM_ANREDE}") - else: - # Try radio button - anrede_radio = await page.query_selector(f'input[type="radio"][value="{FORM_ANREDE}"], label:has-text("{FORM_ANREDE}") input[type="radio"]') - if anrede_radio: - await anrede_radio.click() - logger.info(f"[DEGEWO] Clicked Anrede radio: {FORM_ANREDE}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Anrede: {e}") + # Navigate to the iframe URL directly in a new page for full access + iframe_page = await self.context.new_page() + try: + await iframe_page.goto(iframe_url, wait_until="networkidle") + await asyncio.sleep(2) + logger.info("[DEGEWO] Loaded Wohnungshelden application page") - # Vorname - try: - vorname_field = await page.query_selector('input[name*="vorname"], input[name*="firstname"], input[id*="vorname"], input[placeholder*="Vorname"]') - if vorname_field: - await vorname_field.fill(FORM_VORNAME) - logger.info(f"[DEGEWO] Filled Vorname: {FORM_VORNAME}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not fill Vorname: {e}") + # Take screenshot of the Wohnungshelden form + screenshot_path = DATA_DIR / f"degewo_wohnungshelden_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[DEGEWO] Saved Wohnungshelden screenshot to {screenshot_path}") - # Nachname - try: - nachname_field = await page.query_selector('input[name*="nachname"], input[name*="lastname"], input[id*="nachname"], input[placeholder*="Nachname"]') - if nachname_field: - await nachname_field.fill(FORM_NACHNAME) - logger.info(f"[DEGEWO] Filled Nachname: {FORM_NACHNAME}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not fill Nachname: {e}") + # Save HTML for debugging + html_content = await iframe_page.content() + html_path = DATA_DIR / f"degewo_wohnungshelden_{listing['id']}.html" + with open(html_path, 'w', encoding='utf-8') as f: + f.write(html_content) + logger.info(f"[DEGEWO] Saved HTML to {html_path}") - # E-Mail - try: - email_field = await page.query_selector('input[type="email"], input[name*="email"], input[name*="mail"], input[id*="email"]') - if email_field: - await email_field.fill(FORM_EMAIL) - logger.info(f"[DEGEWO] Filled E-Mail: {FORM_EMAIL}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not fill E-Mail: {e}") + # Fill out Wohnungshelden form + # The form uses specific IDs: #firstName, #lastName, #email, etc. + form_filled = False - # Telefonnummer - try: - tel_field = await page.query_selector('input[type="tel"], input[name*="telefon"], input[name*="phone"], input[id*="telefon"]') - if tel_field: - await tel_field.fill(FORM_PHONE) - logger.info(f"[DEGEWO] Filled Telefonnummer: {FORM_PHONE}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not handle Telefon: {e}") + # Anrede (Salutation) - ng-select dropdown + try: + # Click on the salutation dropdown to open it + salutation_dropdown = await iframe_page.query_selector('#salutation-dropdown, ng-select[id*="salutation"]') + if salutation_dropdown: + await salutation_dropdown.click() + await asyncio.sleep(0.5) + # Select "Herr" or "Frau" based on FORM_ANREDE + anrede_option = await iframe_page.query_selector(f'.ng-option:has-text("{FORM_ANREDE}")') + if anrede_option: + await anrede_option.click() + logger.info(f"[DEGEWO] Selected Anrede: {FORM_ANREDE}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not set Anrede: {e}") - # Anzahl einziehende Personen - try: - personen_field = await page.query_selector('input[name*="personen"], input[name*="persons"], input[id*="personen"], select[name*="personen"]') - if personen_field: - tag_name = await personen_field.evaluate("el => el.tagName.toLowerCase()") - if tag_name == "select": - await personen_field.select_option(FORM_PERSONS) - else: - await personen_field.fill(FORM_PERSONS) - logger.info(f"[DEGEWO] Set Anzahl Personen: {FORM_PERSONS}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Personen: {e}") + # Vorname (First name) + try: + vorname_field = await iframe_page.query_selector('#firstName') + if vorname_field: + await vorname_field.fill(FORM_VORNAME) + logger.info(f"[DEGEWO] Filled Vorname: {FORM_VORNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Vorname: {e}") - # davon Anzahl Kinder - try: - kinder_field = await page.query_selector('input[name*="kinder"], input[name*="children"], input[id*="kinder"], select[name*="kinder"]') - if kinder_field: - tag_name = await kinder_field.evaluate("el => el.tagName.toLowerCase()") - if tag_name == "select": - await kinder_field.select_option(FORM_CHILDREN) - else: - await kinder_field.fill(FORM_CHILDREN) - logger.info(f"[DEGEWO] Set Anzahl Kinder: {FORM_CHILDREN}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Kinder: {e}") + # Nachname (Last name) + try: + nachname_field = await iframe_page.query_selector('#lastName') + if nachname_field: + await nachname_field.fill(FORM_NACHNAME) + logger.info(f"[DEGEWO] Filled Nachname: {FORM_NACHNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Nachname: {e}") - # Monatliches Haushaltsnettoeinkommen - try: - einkommen_field = await page.query_selector('input[name*="einkommen"], input[name*="income"], input[id*="einkommen"], select[name*="einkommen"]') - if einkommen_field: - tag_name = await einkommen_field.evaluate("el => el.tagName.toLowerCase()") - if tag_name == "select": - # Try to select by value or index - try: - await einkommen_field.select_option(FORM_INCOME) - except: - # Fallback to first non-empty option - options = await einkommen_field.query_selector_all("option") - if len(options) > 1: - await einkommen_field.select_option(index=1) - else: - await einkommen_field.fill(FORM_INCOME) - logger.info(f"[DEGEWO] Set Einkommen: {FORM_INCOME}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Einkommen: {e}") + # E-Mail + try: + email_field = await iframe_page.query_selector('#email') + if email_field: + await email_field.fill(FORM_EMAIL) + logger.info(f"[DEGEWO] Filled E-Mail: {FORM_EMAIL}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill E-Mail: {e}") - # "Für mich selbst" selection - try: - selbst_radio = await page.query_selector('input[type="radio"][value*="selbst"], input[type="radio"][value*="myself"], label:has-text("Für mich selbst") input') - if selbst_radio: - await selbst_radio.click() - logger.info("[DEGEWO] Selected: Für mich selbst") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set 'Für mich selbst': {e}") + # Telefonnummer + try: + tel_field = await iframe_page.query_selector('input[id*="telefonnummer"]') + if tel_field: + await tel_field.fill(FORM_PHONE) + logger.info(f"[DEGEWO] Filled Telefon: {FORM_PHONE}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Telefon: {e}") - # Accept data privacy checkbox - try: - checkbox = await page.query_selector('input[type="checkbox"][name*="datenschutz"], input[type="checkbox"][name*="privacy"], input[type="checkbox"][name*="consent"]') - if checkbox and not await checkbox.is_checked(): - await checkbox.click() - logger.info("[DEGEWO] Checked privacy/consent checkbox") - except Exception as e: - logger.warning(f"[DEGEWO] Could not check consent: {e}") + # Anzahl einziehende Personen + try: + personen_field = await iframe_page.query_selector('input[id*="numberPersonsTotal"]') + if personen_field: + await personen_field.fill(FORM_PERSONS) + logger.info(f"[DEGEWO] Filled Anzahl Personen: {FORM_PERSONS}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Anzahl Personen: {e}") - await asyncio.sleep(1) + # "Für sich selbst" dropdown + try: + selbst_dropdown = await iframe_page.query_selector('ng-select[id*="fuer_wen"]') + if selbst_dropdown: + await selbst_dropdown.click() + await asyncio.sleep(0.5) + # Select "Für mich selbst" + selbst_option = await iframe_page.query_selector('.ng-option:has-text("Für mich selbst"), .ng-option:has-text("selbst")') + if selbst_option: + await selbst_option.click() + logger.info("[DEGEWO] Selected: Für mich selbst") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not set 'Für sich selbst': {e}") - # Take screenshot before submitting - screenshot_path = DATA_DIR / f"degewo_form_{listing['id']}.png" - await page.screenshot(path=str(screenshot_path), full_page=True) - logger.info(f"[DEGEWO] Saved form screenshot to {screenshot_path}") + await asyncio.sleep(1) - # Submit the form - try: - submit_btn = await page.query_selector('button[type="submit"], input[type="submit"], button:has-text("Absenden"), button:has-text("Senden")') - if submit_btn and await submit_btn.is_visible(): - await submit_btn.click() - logger.info("[DEGEWO] Clicked submit button") - await asyncio.sleep(3) + # Take screenshot after filling form + screenshot_path = DATA_DIR / f"degewo_form_filled_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[DEGEWO] Saved filled form screenshot to {screenshot_path}") - # Take screenshot after submission - screenshot_path = DATA_DIR / f"degewo_submitted_{listing['id']}.png" - await page.screenshot(path=str(screenshot_path), full_page=True) - logger.info(f"[DEGEWO] Saved submission screenshot to {screenshot_path}") + # Try to submit + try: + # Look for submit button with various patterns + submit_selectors = [ + 'button[type="submit"]', + 'input[type="submit"]', + 'button:has-text("Absenden")', + 'button:has-text("Senden")', + 'button:has-text("Anfrage")', + 'button:has-text("Bewerben")', + 'button:has-text("Submit")', + '.btn-primary', + '.submit-btn', + ] - result["success"] = True - result["message"] = "Application submitted" - else: - result["success"] = True - result["message"] = "Form filled, submit button not found" - logger.warning("[DEGEWO] Submit button not found") - except Exception as e: - result["success"] = True - result["message"] = f"Form filled, submit error: {str(e)}" - logger.warning(f"[DEGEWO] Submit error: {e}") + submit_btn = None + for selector in submit_selectors: + submit_btn = await iframe_page.query_selector(selector) + if submit_btn and await submit_btn.is_visible(): + logger.info(f"[DEGEWO] Found submit button with selector: {selector}") + break + submit_btn = None + if submit_btn: + await submit_btn.click() + logger.info("[DEGEWO] Clicked submit button") + await asyncio.sleep(3) + + # Take screenshot after submission + screenshot_path = DATA_DIR / f"degewo_submitted_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[DEGEWO] Saved submission screenshot to {screenshot_path}") + + result["success"] = True + result["message"] = "Application submitted via Wohnungshelden" + else: + # Submit button not found - this is a failure + result["success"] = False + result["message"] = "Wohnungshelden form loaded but submit button not found" + logger.warning("[DEGEWO] Submit button not found in Wohnungshelden form") + except Exception as e: + result["success"] = False + result["message"] = f"Wohnungshelden submit error: {str(e)}" + logger.warning(f"[DEGEWO] Submit error: {e}") + finally: + await iframe_page.close() + else: + # No iframe found - try the old approach (fallback for different page structure) + logger.warning("[DEGEWO] Wohnungshelden iframe not found, trying direct form...") + + # Take screenshot for debugging + screenshot_path = DATA_DIR / f"degewo_noiframe_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path), full_page=True) + + # Save HTML for debugging + html_content = await page.content() + html_path = DATA_DIR / "degewo_debug.html" + with open(html_path, 'w', encoding='utf-8') as f: + f.write(html_content) + + result["success"] = False + result["message"] = "Wohnungshelden iframe not found on page" else: result["message"] = "No kontaktieren button found" logger.warning("[DEGEWO] Could not find kontaktieren button") @@ -762,22 +865,138 @@ class ApplicationHandler: await asyncio.sleep(1) except: pass - logger.info("[STADTUNDLAND] Looking for application button...") - apply_btn = await page.query_selector('a:has-text("Anfragen"), button:has-text("Bewerben"), a:has-text("Interesse")') - if apply_btn and await apply_btn.is_visible(): - logger.info("[STADTUNDLAND] Found application button, clicking...") - await apply_btn.click() - await asyncio.sleep(2) + # Stadt und Land has the contact form directly on the page + logger.info("[STADTUNDLAND] Looking for contact form fields...") - screenshot_path = DATA_DIR / f"stadtundland_{listing['id']}.png" - await page.screenshot(path=str(screenshot_path)) - logger.info(f"[STADTUNDLAND] Saved screenshot to {screenshot_path}") + form_filled = False - result["success"] = True - result["message"] = "Application page opened" + # Fill Vorname + try: + vorname_field = await page.query_selector('input[name*="vorname" i], input[placeholder*="Vorname" i], input#vorname') + if vorname_field: + await vorname_field.fill(FORM_VORNAME) + logger.info(f"[STADTUNDLAND] Filled Vorname: {FORM_VORNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Vorname: {e}") + + # Fill Nachname + try: + nachname_field = await page.query_selector('input[name*="nachname" i], input[placeholder*="Nachname" i], input#nachname') + if nachname_field: + await nachname_field.fill(FORM_NACHNAME) + logger.info(f"[STADTUNDLAND] Filled Nachname: {FORM_NACHNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Nachname: {e}") + + # Fill Telefonnummer + try: + tel_field = await page.query_selector('input[name*="telefon" i], input[type="tel"], input[placeholder*="Telefon" i]') + if tel_field: + await tel_field.fill(FORM_PHONE) + logger.info(f"[STADTUNDLAND] Filled Telefon: {FORM_PHONE}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Telefon: {e}") + + # Fill E-Mail + try: + email_field = await page.query_selector('input[type="email"], input[name*="email" i], input[name*="mail" i]') + if email_field: + await email_field.fill(FORM_EMAIL) + logger.info(f"[STADTUNDLAND] Filled E-Mail: {FORM_EMAIL}") + form_filled = True + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill E-Mail: {e}") + + # Fill Straße (street) + try: + strasse_field = await page.query_selector('input[name*="strasse" i], input[name*="straße" i], input[placeholder*="Straße" i], input#strasse') + if strasse_field and FORM_STRASSE: + await strasse_field.fill(FORM_STRASSE) + logger.info(f"[STADTUNDLAND] Filled Straße: {FORM_STRASSE}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Straße: {e}") + + # Fill Hausnummer + try: + hausnummer_field = await page.query_selector('input[name*="hausnummer" i], input[name*="hausnr" i], input[placeholder*="Hausnummer" i], input#hausnummer') + if hausnummer_field and FORM_HAUSNUMMER: + await hausnummer_field.fill(FORM_HAUSNUMMER) + logger.info(f"[STADTUNDLAND] Filled Hausnummer: {FORM_HAUSNUMMER}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Hausnummer: {e}") + + # Fill PLZ + try: + plz_field = await page.query_selector('input[name*="plz" i], input[placeholder*="PLZ" i], input#plz') + if plz_field and FORM_PLZ: + await plz_field.fill(FORM_PLZ) + logger.info(f"[STADTUNDLAND] Filled PLZ: {FORM_PLZ}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill PLZ: {e}") + + # Fill Ort (city) + try: + ort_field = await page.query_selector('input[name*="ort" i], input[placeholder*="Ort" i], input#ort') + if ort_field and FORM_ORT: + await ort_field.fill(FORM_ORT) + logger.info(f"[STADTUNDLAND] Filled Ort: {FORM_ORT}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Ort: {e}") + + # Check Datenschutz checkbox + try: + datenschutz_checkbox = await page.query_selector('input[type="checkbox"][name*="datenschutz" i], input[type="checkbox"][name*="privacy" i]') + if datenschutz_checkbox and not await datenschutz_checkbox.is_checked(): + await datenschutz_checkbox.click() + logger.info("[STADTUNDLAND] Checked Datenschutz checkbox") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not check Datenschutz: {e}") + + # Check Provision checkbox + try: + provision_checkbox = await page.query_selector('input[type="checkbox"][name*="provision" i]') + if provision_checkbox and not await provision_checkbox.is_checked(): + await provision_checkbox.click() + logger.info("[STADTUNDLAND] Checked Provision checkbox") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not check Provision: {e}") + + await asyncio.sleep(1) + + # Screenshot before submitting + screenshot_path = DATA_DIR / f"stadtundland_form_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[STADTUNDLAND] Saved form screenshot to {screenshot_path}") + + if form_filled: + # Submit the form - look for submit button + try: + submit_btn = await page.query_selector('button[type="submit"], input[type="submit"], button:has-text("prüfen"), button:has-text("Absenden"), button:has-text("Senden")') + if submit_btn and await submit_btn.is_visible(): + await submit_btn.click() + logger.info("[STADTUNDLAND] Clicked submit button") + await asyncio.sleep(3) + + # Screenshot after submission + screenshot_path = DATA_DIR / f"stadtundland_submitted_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[STADTUNDLAND] Saved submission screenshot to {screenshot_path}") + + result["success"] = True + result["message"] = "Application submitted" + else: + result["success"] = True + result["message"] = "Form filled, submit button not found" + logger.warning("[STADTUNDLAND] Submit button not found") + except Exception as e: + result["success"] = True + result["message"] = f"Form filled, submit error: {str(e)}" + logger.warning(f"[STADTUNDLAND] Submit error: {e}") else: - result["message"] = "No application button found" - logger.warning("[STADTUNDLAND] Could not find application button") + result["message"] = "No form fields found" + logger.warning("[STADTUNDLAND] Could not find form fields") screenshot_path = DATA_DIR / f"stadtundland_nobtn_{listing['id']}.png" await page.screenshot(path=str(screenshot_path)) except Exception as e: @@ -1284,26 +1503,331 @@ class InBerlinMonitor: return await self.apply_to_listings(listings) +class WGCompanyMonitor: + """Monitor WGcompany.de for new WG room listings""" + + def __init__(self): + self.browser = None + self.context = None + + async def init_browser(self): + """Initialize Playwright browser""" + if self.browser is None: + self.playwright = await async_playwright().start() + self.browser = await self.playwright.chromium.launch(headless=True) + self.context = await self.browser.new_context( + user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" + ) + logger.info("[WGCOMPANY] Browser initialized") + + async def fetch_listings(self) -> list[dict]: + """Fetch WG listings from wgcompany.de search""" + listings = [] + + try: + page = await self.context.new_page() + + # Use simple search page: st=1 (Berlin), mi=10 (simple WG search), li=100 + search_url = "http://www.wgcompany.de/cgi-bin/seite?st=1&mi=10&li=100" + logger.info(f"[WGCOMPANY] Loading search page: {search_url}") + await page.goto(search_url, wait_until="networkidle") + await asyncio.sleep(2) + + # Fill search form - field names from simple search: + # c = Min. Größe (min size m²) + # a = Max. Miete (max rent €) + # l = Alter (age) + # e = Bezirk (district select) + + # Min size field + if WGCOMPANY_MIN_SIZE: + min_size_field = await page.query_selector('input[name="c"]') + if min_size_field: + await min_size_field.fill(WGCOMPANY_MIN_SIZE) + logger.info(f"[WGCOMPANY] Set min size: {WGCOMPANY_MIN_SIZE} m²") + + # Max rent field + if WGCOMPANY_MAX_PRICE: + max_price_field = await page.query_selector('input[name="a"]') + if max_price_field: + await max_price_field.fill(WGCOMPANY_MAX_PRICE) + logger.info(f"[WGCOMPANY] Set max rent: {WGCOMPANY_MAX_PRICE} €") + + # Age field (l = Alter) + if WGCOMPANY_AGE: + age_field = await page.query_selector('input[name="l"]') + if age_field: + await age_field.fill(WGCOMPANY_AGE) + logger.info(f"[WGCOMPANY] Set age: {WGCOMPANY_AGE}") + + # Smoker filter (o = RaucherIn: NR=Nichtraucher, R=Raucher) + if WGCOMPANY_SMOKER: + smoker_select = await page.query_selector('select[name="o"]') + if smoker_select: + await smoker_select.select_option(WGCOMPANY_SMOKER) + logger.info(f"[WGCOMPANY] Set smoker: {WGCOMPANY_SMOKER}") + + # District selection (e = Bezirk, multi-select) + # Leave as default "egal" (all districts) unless specified + if WGCOMPANY_BEZIRK and WGCOMPANY_BEZIRK != "0": + bezirk_select = await page.query_selector('select[name="e"]') + if bezirk_select: + await bezirk_select.select_option(WGCOMPANY_BEZIRK) + logger.info(f"[WGCOMPANY] Set district: {WGCOMPANY_BEZIRK}") + + # Submit the search form + submit_btn = await page.query_selector('input[type="submit"][value*="finde"], input[type="submit"]') + if submit_btn: + logger.info("[WGCOMPANY] Submitting search form...") + await submit_btn.click() + await page.wait_for_load_state("networkidle") + await asyncio.sleep(2) + + # Get results page content + content = await page.content() + + # Save debug HTML + debug_path = DATA_DIR / "wgcompany_debug.html" + with open(debug_path, "w", encoding="utf-8") as f: + f.write(content) + logger.info(f"[WGCOMPANY] Saved debug HTML to {debug_path}") + + # Parse listings from the results page + # WGcompany results typically have tables with room info + # Look for listing links and extract data + + # Pattern to find listing detail links + # Format: wg.pl?...function=wgzeigen... with room details in table rows + listing_links = await page.query_selector_all('a[href*="wg.pl"][href*="wgzeigen"]') + logger.info(f"[WGCOMPANY] Found {len(listing_links)} listing links") + + for link_elem in listing_links: + try: + href = await link_elem.get_attribute("href") + if not href: + continue + + # Get surrounding text/row for listing details + parent = await link_elem.evaluate_handle("el => el.closest('tr') || el.parentElement") + row_text = await parent.evaluate("el => el.innerText") if parent else "" + + # Extract price from row text (e.g., "350 €" or "350€") + price_match = re.search(r'(\d+)\s*€', row_text) + price = price_match.group(1) + " €" if price_match else "?" + + # Extract size (e.g., "15 m²" or "15m²") + size_match = re.search(r'(\d+)\s*m²', row_text) + size = size_match.group(1) + " m²" if size_match else "?" + + # Extract district/location + # Common Berlin districts in text + bezirk_patterns = [ + "Kreuzberg", "Neukölln", "Friedrichshain", "Prenzlauer Berg", + "Mitte", "Wedding", "Charlottenburg", "Schöneberg", "Tempelhof", + "Steglitz", "Wilmersdorf", "Pankow", "Lichtenberg", "Treptow", + "Köpenick", "Reinickendorf", "Spandau", "Zehlendorf", "Moabit" + ] + location = "Berlin" + for bez in bezirk_patterns: + if bez.lower() in row_text.lower(): + location = bez + break + + # Make absolute URL + if not href.startswith("http"): + href = f"http://www.wgcompany.de{href}" if href.startswith("/") else f"http://www.wgcompany.de/cgi-bin/{href}" + + # Generate unique ID from link and key details + listing_id = hashlib.md5(f"{href}{price}{size}".encode()).hexdigest()[:12] + + listings.append({ + "id": listing_id, + "rooms": "1 Zimmer (WG)", + "size": size, + "price": price, + "address": location, + "link": href, + "source": "wgcompany", + "fetched_at": datetime.now().isoformat() + }) + except Exception as e: + logger.debug(f"[WGCOMPANY] Error parsing listing: {e}") + continue + + # Deduplicate by id + seen_ids = set() + unique_listings = [] + for listing in listings: + if listing["id"] not in seen_ids: + seen_ids.add(listing["id"]) + unique_listings.append(listing) + listings = unique_listings + + await page.close() + logger.info(f"[WGCOMPANY] Fetched {len(listings)} unique listings") + return listings + + except Exception as e: + logger.error(f"[WGCOMPANY] Error fetching listings: {e}") + import traceback + logger.error(traceback.format_exc()) + return [] + + def load_previous_listings(self) -> dict: + """Load previously saved WGcompany listings""" + if WGCOMPANY_LISTINGS_FILE.exists(): + with open(WGCOMPANY_LISTINGS_FILE, "r") as f: + return json.load(f) + return {} + + def save_listings(self, listings: list[dict]): + """Save current WGcompany listings""" + listings_dict = {l["id"]: l for l in listings} + with open(WGCOMPANY_LISTINGS_FILE, "w") as f: + json.dump(listings_dict, f, indent=2, ensure_ascii=False) + + def find_new_listings(self, current: list[dict], previous: dict) -> list[dict]: + """Find listings that are new since last check""" + new = [] + for listing in current: + if listing["id"] not in previous: + new.append(listing) + return new + + def send_telegram(self, message: str): + """Send notification via Telegram""" + if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID: + logger.warning("[WGCOMPANY] Telegram not configured, skipping notification") + return + + try: + url = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage" + data = { + "chat_id": TELEGRAM_CHAT_ID, + "text": message, + "parse_mode": "HTML", + "disable_web_page_preview": True + } + response = requests.post(url, data=data) + if response.ok: + logger.info("[WGCOMPANY] Telegram notification sent") + else: + logger.error(f"[WGCOMPANY] Telegram error: {response.text}") + except Exception as e: + logger.error(f"[WGCOMPANY] Telegram error: {e}") + + def log_listing_times(self, new_listings: list[dict]): + """Log new WGcompany listing appearance times to CSV""" + if not new_listings: + return + + file_exists = WGCOMPANY_TIMING_FILE.exists() + + with open(WGCOMPANY_TIMING_FILE, "a", newline="", encoding="utf-8") as f: + writer = csv.writer(f) + if not file_exists: + writer.writerow(["timestamp", "weekday", "hour", "minute", "rooms", "size", "price", "address", "listing_id"]) + + now = datetime.now() + for listing in new_listings: + writer.writerow([ + now.isoformat(), + now.strftime("%A"), + now.hour, + now.minute, + listing["rooms"], + listing["size"], + listing["price"], + listing["address"], + listing["id"] + ]) + + logger.info(f"[WGCOMPANY] Logged {len(new_listings)} listing times to CSV") + + def notify_new_listings(self, new_listings: list[dict]): + """Send individual notification for each new WGcompany listing""" + if not new_listings: + return + + for listing in new_listings: + message = f"🏠 Neues WG-Zimmer! (WGcompany)\n\n" + message += f"🚪 {listing['rooms']}\n" + message += f"📐 {listing['size']}\n" + message += f"💰 {listing['price']}\n" + message += f"📍 {listing['address']}\n\n" + message += f"👉 Zum Angebot" + + self.send_telegram(message) + time.sleep(0.5) + + def check(self): + """Run a single check for new WGcompany listings""" + logger.info("[WGCOMPANY] Starting check...") + + # Fetch current listings + current_listings = asyncio.get_event_loop().run_until_complete(self._async_fetch()) + if not current_listings: + logger.warning("[WGCOMPANY] No listings fetched") + return + + # Load previous listings + previous_listings = self.load_previous_listings() + + # First run - just save baseline + if not previous_listings: + logger.info(f"[WGCOMPANY] First run - saving {len(current_listings)} listings as baseline") + self.save_listings(current_listings) + return + + # Find new listings + new_listings = self.find_new_listings(current_listings, previous_listings) + + if new_listings: + logger.info(f"[WGCOMPANY] Found {len(new_listings)} new listing(s)") + self.log_listing_times(new_listings) + self.notify_new_listings(new_listings) + else: + logger.info("[WGCOMPANY] No new listings") + + # Save current state + self.save_listings(current_listings) + + async def _async_fetch(self): + await self.init_browser() + return await self.fetch_listings() + + def main(): """Main entry point""" # Ensure data directory exists DATA_DIR.mkdir(parents=True, exist_ok=True) - monitor = InBerlinMonitor() + # Initialize monitors + inberlin_monitor = InBerlinMonitor() + wgcompany_monitor = WGCompanyMonitor() if WGCOMPANY_ENABLED else None # Start Telegram command listener - telegram_bot = TelegramBot(monitor) + telegram_bot = TelegramBot(inberlin_monitor) telegram_bot.start() - logger.info(f"inberlin-monitor started (interval: {CHECK_INTERVAL}s)") - logger.info(f"Autopilot: {'ENABLED' if monitor.is_autopilot_enabled() else 'DISABLED'}") + logger.info(f"Monitor started (interval: {CHECK_INTERVAL}s)") + logger.info(f"InBerlin Autopilot: {'ENABLED' if inberlin_monitor.is_autopilot_enabled() else 'DISABLED'}") + logger.info(f"WGcompany: {'ENABLED' if WGCOMPANY_ENABLED else 'DISABLED'}") while True: + # Check InBerlinWohnen try: - monitor.check() + inberlin_monitor.check() except Exception as e: - logger.error(f"Check failed: {e}") + logger.error(f"InBerlin check failed: {e}") + + # Check WGcompany + if wgcompany_monitor: + try: + wgcompany_monitor.check() + except Exception as e: + logger.error(f"WGcompany check failed: {e}") time.sleep(CHECK_INTERVAL) diff --git a/requirements.txt b/requirements.txt index 6303c2e..2a0a493 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ requests>=2.31.0 -playwright>=1.49.0 +playwright>=1.57.0 matplotlib>=3.8.0 pandas>=2.0.0