From ce66fc1933bcca40acf3b0421ab202068d4c86ee Mon Sep 17 00:00:00 2001 From: Aron Date: Tue, 9 Dec 2025 11:30:17 +0100 Subject: [PATCH] Fix Degewo auto-apply (Wohnungshelden iframe), update dependencies, cleanup for production - Fix Degewo handler to work with Wohnungshelden iframe portal - Update playwright to >=1.57.0 - Add proper form field selectors for Wohnungshelden - Fix success status bug (was marking failed submissions as success) - Clean up .env.example (remove real credentials) - Update README with housing company support table - Add BOTFATHER_COMMANDS.txt for easy bot setup - Add copilot-instructions.md for development context --- .env.example | 32 +- .github/copilot-instructions.md | 81 + .gitignore | 7 + .python-version | 2 +- BOTFATHER_COMMANDS.txt | 6 + Dockerfile | 2 +- README.md | 48 +- bot_logo.png | Bin 0 -> 9699 bytes debug_page.html | 3399 ------------------------------- docker-compose.yml | 20 +- monitor.py | 854 ++++++-- requirements.txt | 2 +- 12 files changed, 854 insertions(+), 3599 deletions(-) create mode 100644 .github/copilot-instructions.md create mode 100644 BOTFATHER_COMMANDS.txt create mode 100644 bot_logo.png delete mode 100644 debug_page.html diff --git a/.env.example b/.env.example index 406a77c..b404631 100644 --- a/.env.example +++ b/.env.example @@ -3,9 +3,31 @@ TELEGRAM_BOT_TOKEN=your_bot_token_here TELEGRAM_CHAT_ID=your_chat_id_here -# inberlinwohnen.de Login -INBERLIN_EMAIL=aron@petau.net -INBERLIN_PASSWORD=BvA5n0iKmGV1 +# inberlinwohnen.de Login (optional - for personalized filtered results) +INBERLIN_EMAIL=your_email@example.com +INBERLIN_PASSWORD=your_password_here -# Check interval in seconds (default: 600 = 10 minutes) -CHECK_INTERVAL=600 +# Check interval in seconds (default: 300 = 5 minutes) +CHECK_INTERVAL=300 + +# Form Data for Autopilot Applications +FORM_ANREDE=Herr +FORM_VORNAME=Max +FORM_NACHNAME=Mustermann +FORM_EMAIL=max@example.com +FORM_PHONE=030123456789 +FORM_STRASSE=Musterstraße +FORM_HAUSNUMMER=1 +FORM_PLZ=10115 +FORM_ORT=Berlin +FORM_PERSONS=2 +FORM_CHILDREN=0 +FORM_INCOME=2500 + +# WGcompany.de Search Filters (optional) +WGCOMPANY_ENABLED=true +WGCOMPANY_MIN_SIZE= +WGCOMPANY_MAX_SIZE= +WGCOMPANY_MIN_PRICE= +WGCOMPANY_MAX_PRICE= +WGCOMPANY_BEZIRK=0 diff --git a/.github/copilot-instructions.md b/.github/copilot-instructions.md new file mode 100644 index 0000000..b6c4dd2 --- /dev/null +++ b/.github/copilot-instructions.md @@ -0,0 +1,81 @@ +# Copilot Instructions for inberlin-monitor + +## Project Overview + +A Python-based apartment monitoring bot for Berlin's public housing portal (inberlinwohnen.de) and WG rooms (wgcompany.de). Monitors listings from 6 housing companies (HOWOGE, Gewobag, Degewo, Gesobau, Stadt und Land, WBM) plus WGcompany, and sends Telegram notifications with optional auto-application via Playwright browser automation. + +## Architecture + +**Single-file monolith** (`monitor.py`, ~1600 lines) with five main classes: +- `InBerlinMonitor` - Core scraping/monitoring loop for inberlinwohnen.de, login handling, listing detection +- `WGCompanyMonitor` - Monitors wgcompany.de WG rooms with configurable search filters +- `ApplicationHandler` - Company-specific form automation (each `_apply_*` method handles one housing company) +- `TelegramBot` - Command handling via long-polling in a daemon thread +- Main loop runs synchronous with `asyncio.get_event_loop().run_until_complete()` for Playwright calls + +**Data flow**: Fetch listings → Compare with `listings.json` / `wgcompany_listings.json` → Detect new → Log to CSV → Auto-apply if autopilot enabled (inberlin only) → Save to `applications.json` → Send Telegram notification + +## Key Patterns + +### Company-specific handlers +Each housing company has a dedicated `_apply_{company}()` method in `ApplicationHandler`. When adding support for a new company: +1. Add detection in `_detect_company()` (line ~350) +2. Add handler call in `apply()` switch (line ~330) +3. Implement `_apply_newcompany()` following existing patterns (cookie dismiss → find button → fill form → submit → screenshot) + +### Listing identification +Listings are hashed by `md5(key_fields)[:12]` to generate stable IDs: +- InBerlin: `md5(rooms+size+price+address)` +- WGcompany: `md5(link+price+size)` + +### State management +- `state.json` - Runtime state (autopilot toggle) +- `listings.json` - Previously seen inberlinwohnen listings +- `wgcompany_listings.json` - Previously seen WGcompany listings +- `applications.json` - Application history with success/failure status +- `listing_times.csv` / `wgcompany_times.csv` - Time-series data for pattern analysis + +## Development + +### Run locally +```bash +# Install deps (requires Playwright) +pip install -r requirements.txt +playwright install chromium + +# Set env vars and run +export TELEGRAM_BOT_TOKEN=... TELEGRAM_CHAT_ID=... +python monitor.py +``` + +### Docker (production) +```bash +cp .env.example .env # Configure credentials +docker compose up -d +docker compose logs -f +``` + +### Debugging +- Screenshots saved to `data/` on application failures (`*_nobtn_*.png`) +- HTML saved to `data/debug_page.html` (inberlin) and `data/wgcompany_debug.html` +- Full logs in `data/monitor.log` + +## Environment Variables + +Required: `TELEGRAM_BOT_TOKEN`, `TELEGRAM_CHAT_ID` +InBerlin login: `INBERLIN_EMAIL`, `INBERLIN_PASSWORD` +Form data: `FORM_ANREDE`, `FORM_VORNAME`, `FORM_NACHNAME`, `FORM_EMAIL`, `FORM_PHONE`, `FORM_STRASSE`, `FORM_HAUSNUMMER`, `FORM_PLZ`, `FORM_ORT`, `FORM_PERSONS`, `FORM_CHILDREN`, `FORM_INCOME` +WGcompany: `WGCOMPANY_ENABLED`, `WGCOMPANY_MIN_SIZE`, `WGCOMPANY_MAX_SIZE`, `WGCOMPANY_MIN_PRICE`, `WGCOMPANY_MAX_PRICE`, `WGCOMPANY_BEZIRK` + +## Common Tasks + +### Fix a broken company handler +Check `data/*_nobtn_*.png` screenshots and `data/debug_page.html` to see actual page structure. Update selectors in the corresponding `_apply_{company}()` method. + +### Add Telegram command +1. Add case in `TelegramBot._handle_update()` (line ~95) +2. Implement `_handle_{command}_command()` method + +### Modify listing extraction +- InBerlin: Update regex patterns in `InBerlinMonitor.fetch_listings()`. Test against `data/debug_page.html`. +- WGcompany: Update parsing in `WGCompanyMonitor.fetch_listings()`. Test against `data/wgcompany_debug.html`. diff --git a/.gitignore b/.gitignore index e94e366..0017322 100644 --- a/.gitignore +++ b/.gitignore @@ -4,10 +4,12 @@ __pycache__/ *.class *.so .Python +.venv .venv/ venv/ ENV/ env/ +.python-version # Data data/ @@ -24,3 +26,8 @@ data/ # OS .DS_Store Thumbs.db + +# Debug files (these should be in data/ but just in case) +debug_page.html +*.html +!README.md diff --git a/.python-version b/.python-version index b6d8b76..24ee5b1 100644 --- a/.python-version +++ b/.python-version @@ -1 +1 @@ -3.11.8 +3.13 diff --git a/BOTFATHER_COMMANDS.txt b/BOTFATHER_COMMANDS.txt new file mode 100644 index 0000000..3fef6a9 --- /dev/null +++ b/BOTFATHER_COMMANDS.txt @@ -0,0 +1,6 @@ +Copy this to BotFather when setting commands with /setcommands: + +autopilot - Toggle automatic applications (on/off) +status - Show current status and stats +plot - Show weekly listing patterns +help - Show available commands diff --git a/Dockerfile b/Dockerfile index 5542e12..d0e2903 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM mcr.microsoft.com/playwright/python:v1.56.0-jammy +FROM mcr.microsoft.com/playwright/python:v1.57.0-jammy WORKDIR /app diff --git a/README.md b/README.md index 9799f5a..a56500e 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # inberlin-monitor -Monitors [inberlinwohnen.de](https://www.inberlinwohnen.de/wohnungsfinder/) for new apartment listings and sends Telegram notifications. Supports automatic application submission via autopilot mode. +Monitors [inberlinwohnen.de](https://www.inberlinwohnen.de/wohnungsfinder/) and [wgcompany.de](http://www.wgcompany.de) for new apartment/WG listings and sends Telegram notifications. Supports automatic application submission via autopilot mode. ## Features @@ -9,10 +9,22 @@ Monitors [inberlinwohnen.de](https://www.inberlinwohnen.de/wohnungsfinder/) for - 📱 Sends Telegram notifications for new listings with clickable links - 🤖 **Autopilot mode**: Automatically applies to new listings - 📊 **/plot command**: Visualize when listings appear throughout the week -- 🏢 Supports multiple housing companies: HOWOGE, Gewobag, Degewo, Gesobau, Stadt und Land, WBM +- 🏢 Supports 6 housing companies: HOWOGE, Gewobag, Degewo, Gesobau, Stadt und Land, WBM +- 🏠 **WGcompany.de**: Also monitors WG room listings with configurable search filters - 💾 Persists state to detect only truly new listings - 📈 Logs listing times for pattern analysis +## Supported Housing Companies + +| Company | Auto-Apply | Notes | +|---------|------------|-------| +| HOWOGE | ✅ | Direct form submission | +| Gewobag | ✅ | Direct form submission | +| Degewo | ✅ | Via Wohnungshelden portal | +| Gesobau | ✅ | Direct form submission | +| Stadt und Land | ✅ | Direct form submission | +| WBM | ✅ | Direct form submission | + ## Setup ### 1. Create Telegram Bot @@ -80,13 +92,28 @@ cat data/monitor.log | Variable | Description | Default | |----------|-------------|---------| | `FORM_ANREDE` | Salutation (Herr/Frau) | Herr | -| `FORM_VORNAME` | First name | Aron | -| `FORM_NACHNAME` | Last name | Petau | -| `FORM_EMAIL` | Contact email | `aron@petau.net` | -| `FORM_PHONE` | Phone number | 017695773688 | +| `FORM_VORNAME` | First name | - | +| `FORM_NACHNAME` | Last name | - | +| `FORM_EMAIL` | Contact email | - | +| `FORM_PHONE` | Phone number | - | +| `FORM_STRASSE` | Street name | - | +| `FORM_HAUSNUMMER` | House number | - | +| `FORM_PLZ` | Postal code | - | +| `FORM_ORT` | City | Berlin | | `FORM_PERSONS` | Number of persons moving in | 1 | | `FORM_CHILDREN` | Number of children | 0 | -| `FORM_INCOME` | Monthly household net income (€) | 1600 | +| `FORM_INCOME` | Monthly household net income (€) | - | + +### Optional - WGcompany.de Search Filters + +| Variable | Description | Default | +|----------|-------------|---------| +| `WGCOMPANY_ENABLED` | Enable WGcompany monitoring | true | +| `WGCOMPANY_MIN_SIZE` | Minimum room size (m²) | - | +| `WGCOMPANY_MAX_SIZE` | Maximum room size (m²) | - | +| `WGCOMPANY_MIN_PRICE` | Minimum rent (€) | - | +| `WGCOMPANY_MAX_PRICE` | Maximum rent (€) | - | +| `WGCOMPANY_BEZIRK` | District code (0=all) | 0 | ## Without Login @@ -98,10 +125,13 @@ All data is stored in the `./data` directory: | File | Description | |------|-------------| -| `listings.json` | Known listings (for duplicate detection) | +| `listings.json` | Known inberlinwohnen listings | +| `wgcompany_listings.json` | Known WGcompany listings | | `state.json` | Monitor state (autopilot on/off) | | `applications.json` | Record of submitted applications | -| `listing_times.csv` | Timing data for pattern analysis | +| `listing_times.csv` | InBerlin timing data for pattern analysis | +| `wgcompany_times.csv` | WGcompany timing data | | `monitor.log` | Application logs | | `weekly_plot.png` | Generated plot from /plot command | +| `wgcompany_debug.html` | Debug HTML from WGcompany | | `*.png` | Screenshots from application attempts | diff --git a/bot_logo.png b/bot_logo.png new file mode 100644 index 0000000000000000000000000000000000000000..b408f53f652863169319b2b0368d52cfb2b56fd1 GIT binary patch literal 9699 zcmd6Nc_5T++xIy}D1=m!ENxWEk~P9uO4+mTYax5~ZOl+aMJj8uRYUgd*#;G|MKN}U z>|-4a8OD5<`@X;Te%|-{p67l3c>j3+a$Iv>b6)3h9KYZ1IF4&X>1e4kA3Avm0Kk0v zmWnO_G>}OH820}Jom4IW;C$|F6-9mjH%p^IN#=e?JQ1IKO{_*XsK?UVOg-W9;7eDN zlJi{3tIQ{v;+%Qa^w!^ZBRVk$Z{L5}+^MXt+YerF=Jd|R=rSE$)9fX~0 z^$W&!b~{G_B2A^2832`s&=E5k0APLczsOkpz?G4cy?JUCRZU%OoExG6p&y%@HxI0T z6TL_Y`3lg|HVhvI3H5|}8ng)mx{n~s3HT)c^ulqwFHH-e{rLbuT!0er=*-`S>DW}0 zuLvL_06^=|BAhEXNA}>6%rqI{WGxastH3F`%IjM21FS(Cz_2A6^{lvv>m?fid`Dpb zDE(7oJ_QUM0>%Gt5MAzlVzpll+hIE;M$e~2M~`4S?*2(T^aNl%sP`Cct3$EdHU}rZ z#wa$8_?RTm&FN4kA*VBV0pHz&@A9aH?OBFYOmy~N*C?h~7>~ zz#5Z8k8Tm{ZZ~w2dc zqA)P7w|{#W{~550shX6K`J_sRMnnzJvwDYSZNNqDya@uG?oCcUJ%%37nLuWg7lh6! zI)s?i_|oWm`mH}7+>mSFf922bvPLG+`Z_;gb?~LzUleD9v3gr%{epJdeT)X6{U1Q^ z2P}T&d#?)6hbf)>F9`WZ%qTj*s%jq&%lMwq}F;8zT{sY#{NB4UBI1o7r>;Al^Ly$*Hp9aU@ zwZU1Hmd}X*1}Mj0-FFOj)7Std`6$?7;nja2Fma8h`_XL}Yrolyv>XSZyPXvD@YM6_ zdDL+Td+I?G}$L<4R?3J@2;*tdK<{Gf$lplYW2LLV3ZYft6x^%H<6He zWAP7@+v6y08DB1$F&Nv;E(cUJ{4c&2L^J$=BjO$zp$fs<8-VYsKB^G@wuzPX3}W8u z_!mV6PC4)>n$Q&=%Fh5EJt1_FLggXjF`>&K%gA9dEq(l zn}6#7AL1F!1LjPzzaDW}24*hj_!MFoR0wEev_L;|AOvN)t7t(wW;VcZkh7sY%U86 ziDDW*#HYjrQ)Hxh{O|%L@{}BKR5X={+YHVW2!z7FSVI#RGWQUmV{AF$kQD*YSX=ge zD+-`tOqw(Vd2v}*p` zGyfgNef^g&iMo9mw#y+!Y+dh<4#?JS5>G?-t`DdDAeT1TI(p?{p ze*9-H{+$Hx--EXCmUmX1t$?Zj(;D?3F8yi5ar?jl?Lv+my(#5*g*c;4boA2GYG%ZV z8&@33M>}JzP9f7y$~@%(tn{J$Z>fz3A42*A^ltoQKEw3u5daL~SxV%*zRU(sv~0HNxf^V#{- zQ!Db4tJDm1h@=zy2%p0&IdPfika()1D}Ef?|HM&c-HvNWi>8_;@Ye-~B(=Q3ALT|9 zhUqc<%7PJ+TtZN-W)ge{i9&Dovx=b;1-4wYfVFtW`K=+QektMwjBI@vh`ienT&jw8 znYk@0mVYo1p!;xE!!>*5Qb~n~Dd;Qx1JDSO*iMOFUKsG^d8p87$9F*qa_kbF+$VlAA3(3VQFHcO%{l3M!l1oURmu<1 z7)kKa0;a4l%s%U`-I?VR7rYCcCu^q6c_BeVw21tkG@!;ffK)UeD*!UhR$IgE;87H0 zJ{+cEj@3sqWw5jI(*Oj0zW%~I{oFGE%CHovw`fVXyLsB7S_nr5z?a6Jn|VJg;I+a5 zsMX;^I9E{;uV=Qs?Dk*<4Wj@vBTAbHa$ zAwYx6kAx`7V;0}th_X`l+{XJw=eh4Iq@uMTutXLJD!vXeO=c+Ufcs@4_s3@l&G>E*XItuaA z{g8KbdC-zxNvh`@fm@yGxxy%X4y;ea@^4k*zA;!-O2pFoy9*k zn%+kQdg(*+PO{PfN7Y?lo}}LuYs8>qN|2C$FhsG70cR*KKerd;Ir$F3&`Atl;(S_X zS=0rgYkC;?R<2th0DlCbdf6xK#SOo`phtxDrWm!AdTq8*2G&g6@` z=!kw1duZuVxseAMMugBv@7S1nOnI7vo`2a&_Zg(=_vs%l}}i5cwOxpYR)i0R)Ui9uYDCC~xUVrUwO zce6+z@ z+;=?{jGGl)3fq?wno2@&(VLh3Q)2O6icsGGZ1~`&R~GIf81jpYkv6-h_*7uHm~!C} zmn@s&5k&W<;F0#cOcv-d{;w}O5Np;g+QpvWaovYt;E|dsT|TAi>%0?GB7t}(=wY;? zPY;_bg$j-4m$^J z)HB$h2Ixo$E9Ua((1epS@>W;(>BU-?keg{J+U;bw+ZD$Yd5Wu%Re8vbmhtWM3zv9; zwd~;8 zG+Qy+_tZ~1tVR*V86MqmsO;Q+%=gK@n}va14tij15MbLp___A*20ZL0Bv9`UAk5Qc zIax2z0OHZ?GZ&`(qvB&>ji&*tHZ9^!ENq{t#NNRmk_Gi z`Xe-8VuE_w=;AK^Lde2NXtNXf50KepDdHk2cktgokq>Xg8agF68WWLbQX27+HhXnbyH&liYRE6w=oHIoLGTSN8JKw$fi+ zCW6D$?aB0YJ;%Rxc2xOXQ2c0?K@X}A`{Qoi*B!cRV_k>$>-hfHhoTl-JL~r*Tjnna1a*Fwd!5%X zQA)(SA$x;WgS__HyzN>XrvwNIwmu2EJ7P$`O_t@(4L40cX8u>V@7+_!MA=RZ1TU|S zEaEk_b9(WMLkmc9l#%7`*v8#S_wDhyEUN+O8D!uJ@twB4!%0+o zm;0%m?m7Og$yVEmXw@{698Z?PP1Qd4kpAiy8n*F;Q+JD3>xHd$c{#eYk25l37yXJx z0#vCPJ4E-?Ys7jX3mwg!&E>kYzHOA>aA7$=N1>WI#y$U*@_y3YT5Z(y*J5`~3%9k# zdVgGD%2niYi@RLloVol8wLg7W#CL^EIJT1)Mbu6WtlG9mZ4DtG-VVap)A!HZt3!^i zLg&C<9mV}_kC~a-+i5CAi4D4_XIzOuYk111E6As>%aitkq&&>+7hu zZT3@;U$w2oZFSUB{w~YCmct!xrn}foUd#IJ$_jHx1#fB}?{%2zRd}sishKO6h+BF^ zkkccSdJ#DB+*IqIdy9SCyrUjB#JapgJe++O5m>)ECkr>q^5Q_KF4>?9`GQTBv~DzN zJjdxoYIKlV?u_B~c!?WpJ`>(?%XhJ1x7t-vTsA|}UEL3Pucf?lb2A9FeiysJYmXbw z?sevGE1Fpl!0lw?P-%L?i3B_;Vy{tnr8XpqNrV$-oz#Maee5^1NO02WbYEJkywXmU z>OYTjR615;v`BB>|LV3G3I9a((ighcZ8D32S{>@{moB$~MI_nw4yEH+Aij)jt3~D^oM!#9B|W#mClpCKb@`HGOq9VvIClm70B)%U&gUWw%Jb`vGtH86YQ zxfWb{I2UQX%Se}-b?*Gc_yAJy9Zi{OqjlTca)vtfiF)qI`suF?yvi*nj#W^qxPyKQ zP954%g)$PgSZ1WIr?#Y-`?C$UN#_T?ob2}sAz?|f}0 zU)enyKuVi;($dFz^2_;@%zl2+F{oVUkkZr&Rfpj>W%*sMqg7Z*zqQ~W0V^86WcQ4`)>OK-c^$sXTZBk-@u)pl zprVA*wrANH2TkcllGOHk`xa7Z?G$gG0Zd{RmwA%&iW)x+8aC}7otpA_-st-Whx}@EErt3)ZtCek@tKXyTF6O?)>`E+p(<@ z(syY(e$u!p_>4lz>uZU!sHrakmhvX~l4mk|>v_2^l3exKh@TyUbdlcv?-hmBcooaG zmEUm{6-EC}y>gA}GE}V-6`c2}8#&*!mX}-?dtoYHw|LhnVXbw0zIP*eR+~E4GdW43 zZ#~C;)~IMhogtO@I(fEuF>Z@^r9wBs;5+(!p@yGpj^-RvpeXG_bQ3<|68^^V8Uh>} zBDL8|pTS!Y{M5kw*3p^K`fUN_cSe4sPrMG?K30ZT^yRg_Yj2rLmpHz6d@;ZHo01(9 zkiM*y>z7~ucEBS^UfL>G86zM zHqRxd(+!N_mI@X;;sQ5t>>xpx_e24{6m{B&=d36Fp=#l}j8k|K>Y=oxG=C4L&pR9G zS-8*R@$vMBhZASo^^OXe<rP$NXGwS&t#Fw^7okkJbqB8fm((k$Z^iV=X%_<)t;* zv7C1@1uv3XU64A{H_a%$bImU;mPY1sN?j}B0{g4U@l+?0o28FttxNpLCEV)u>v}6u zSRsu5XCeEDVP~edN+8Jrs~;FxflXep4@%}i43)UG>eC1CT#3tTE`BRxF?`6|LG9YX znzFm&-xT1xAEnrh7httMAF<@s;q5hY?3&8PhDqLB3|wM*e6gc7+_d-(p}e9^l?|xn zI6Nh^Pm-eT>wE2(pQ|VcbDO3VEwuUzqWDjd?-Nq49MjXgA&>5m&%I zEo3KJM;2m-#z&D5(YekF&m*lj{KBAWH(EYcC;Qtj{>k$WeN`X#5`+5k4k)>F1F7KF z)NJieBNDr(=yS92;$SJMwSiww&J4d;UG&DX?tG2ejfr|oY1A1j&7x-*d5NU^?!%!` zt|8IxII38@*(1+4U6wAS2QRAvt&i3)C84H+04;?&un=sIi0P0XI(@@{4%_ z7583Oq4=)oT&!zQ4dQA1G5Nh=YMP3zYPDq+i@~9Y)D#!bPYqqDp~mv~=|#cLTlB7D z7v;ufD<+~ZlCpkW(JGPYz+`KcZx1a8Qdc~tab;=;P!jGh4b11{klmD5*LF1W@G*|X zf&I50dzFecy{%!}dh%!}PKToh>01%9vq>JJZD73N)0~>ysVSExnEq|Xjk!h)!=te! z6LmKr9RG5dn$76c7l1pAcAa-~CuPej>tK<_#?!``_fiW+8>$&^wav#944jC?3h8Ze z)DM{jYOV&M0oP?IM(l}WRfRJCl$EP}@05*%%!3v0=bv>QBgn2zeA;oEaS-&PvOeNL zzfg-=pUTUI?dE;%AHL{boE5aQ@_pJ~y~)AJ4_}%17*$|fjT+<}f1FoIim8NKH~100 zgy~i}UB4{vQNY>LL{9h^>I77-?T}?1v@LTb?u%#cbykN;em4~<>>QgB_De>b|9)*! z?^Gbj3cM_fG+G+(FmoaGx0b!qFlnkqQM9jtk<4AC@2R;+|3Cs_S_^eK)0ZFDJ_`4B zvz=%1@PW-K_2ph!-bh!RNo7gA@H-((>WbNd*9AMEXyb=5 zSovyFvYwtL{+6<{dnuKno>K}uA+-k7!j0r7x7CrsB+6PlLu0`g!CX-w{L=Xck=G%mDIfF)+56O zg(nC5rnV(h14~Q8*ez2i@CQg7M;xd1gCw}fz@%7@J6FEYfvQ167EItN!R=gjz7b-2 zI47z&KX7r02p62UW zw_~OOtEc4x&cwSCkd66!bxz^z%da;CCTH>0Zc`D`fQ!U>boo_pMz14`e<TA$7b?io8_zuX!Q zupIM|lB_Ygh4w~zt-X};lD5sMd~cA`gGoIbX8dD&*~-QG9*vNx+h;ynK4l1FZa^jlzvJ zhc?OBipzf52cZEmX(=kdCMtXJxY=l0?wFSReCqVak%<`Xu6uR5Ar5Lw+6H;JR^9C` zHJFImC8XW2ms|6~qz@|gA22<;_$s%rvO0kycdYV8O8M8k$V{-Rv~OBe%t~@dNk2hb zLZ;BI@j;0Gux+Y7f%)R}Ky-xOb-&(n0>fWMK7a>T+Lb-xMWVRd2PERgyLzJ5-gkGghR|vAL_3ir{iSZrE z+tuFgo4U6$iTY6xH(Kr_VEt9A?p;M>`yE0GPWlaVK_^O7Gz2SIjCakkTDlXSX3w&L zk5wD3!S1=)iX4Sm*S0ugM(SyQroyy@I`YLc7NcpZ9*gQzOE!681-Xf+%vfq_sX(3f z%97!h%g9iNzaRgL-@AROU&<@Ff-V$cEw>LE=2&<2t(cKS4KD9xixr7eNvI_*-r-mJ zyh{}c$S#TJm|nP%i`VmYFRLOad|tsiX~=m!IA>!RYqaiqpCqi>FWZVs{CRkAbHy@% z6uBaP`ot3uiL=$|57+fUA|Fa^t!l$Ry=gm=(TvB$k=xrR@vjTJ1cOWZ_ck^cPWFe5 zj>=h8J8eamf0?lqlBqZ5TG!T2>$Tw+DB*ciyGuE5bpL!$(=U`Ps(ELFBd~Wz%Ee^< z>0{R&#^J&c3)eeg*|z^R4-hfubQf z;Rc|Neb7s0Y%fUOiyUuOh#f<@nZm6&6;NI)4P2K5i09+U@z`FoqN)jJjxL@zsa3IE ziE(1K+0cwYnQEx6p^w;oH(M{CXPLh|kmNGIgzFo#$Mj}v*I$n%?9rDSt#ixG*Ax!! zjOM80Q`3U)%lqZTreBGtd)X?eYXP;vlrN1o?!ROpt^{AAI>Rl7tD?n^Ltrt_@pcqi zuzG$6$@tM0OPnH{71P@TtC}GH2ncAYTKWCri&+u1D%5T3dwomEnE1zK&Ae33?Pzmi zzh^69;!F7UTFSwNC6|zi`ZuyZ_?fK3CSGyXD__;O-0_qIjdGP`_3W)^i@|ZTC-DTT z!?M$bmfPB?hpIZ4Ep~-@71nyp3MpT$o0&0tk4WEr&aQ0K*{$lftZzT>GFCO{#|!Dt zT({;gk#%!tHO#F`UCDJ{CZP1(-Dw)AJh9BuYLp+X4tS&uVSy2cTZpE3WUhwZCq4DD zBz5^b=f&NHd)Qgkc~5ZS({h@Rzxi1&x4DR9FI`55*gc;~_0!H#AdN*M*Wx#0`Yr8J zMvc}{X@Se599i|nI(wA+E+zV8rHa);7lPyTCefWk#*2yW%pX5;%?P~f@qAaa>(1EY zvu!K}Z)1FDwwn^6D>iFd&CzL~VpysTzYiRq1bbn+X{H__GTl*0?HFwW0H5K+u=@Xbrtujz;S!r}gVr+ zp3A-@S>MW6AKPcYUW+lf#Q$V{Iyz{*XLDD7cs5j-cmLOzru~10_~Vu2{?-40@p7ID Zhw)ozoY$>chu&s_+c&gS3YDzG{tvhaO}+pC literal 0 HcmV?d00001 diff --git a/debug_page.html b/debug_page.html deleted file mode 100644 index 933dcc9..0000000 --- a/debug_page.html +++ /dev/null @@ -1,3399 +0,0 @@ - - - - - - - - - - - - - - - - - - - -
-
-
- - - - -
-
- -
-
- -
- - - - -
- -
-
- - -
- - -
-
- -
- -
-
-
- -
-
- -
- -
-
-
-
-
-
- - -
-
-
-
- - -
-
- - -
-
-
-
- - -
-
- - Loading... -
- -
- Wir haben 54 Wohnungen für Sie gefunden -
- -
- - - -
-
- - - - Sortierung/Filter: - - -
-
- - - -
- - -
-
-
- - - -
- - -
- - -
-
- - - - -
- Keine Netzwekverbindung. Sie sind offline. -
- - - - - - -
-
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- -
- - -
- - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
-
- - - - - - -
- -
- - - - - - - - -
- -

Änderungen/Irrtümer vorbehalten.
Die genauen Wohnungsdaten entnehmen Sie bitte den jeweiligen Exposés der Wohnungsunternehmen (per Klick auf »Alle Details«). -

-
-
- -
- -
-
-
- - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml index 9fec014..405e11d 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -3,23 +3,7 @@ services: build: . container_name: inberlin-monitor restart: unless-stopped - environment: - # Telegram notifications - - TELEGRAM_BOT_TOKEN=${TELEGRAM_BOT_TOKEN} - - TELEGRAM_CHAT_ID=${TELEGRAM_CHAT_ID} - # inberlinwohnen.de login - - INBERLIN_EMAIL=${INBERLIN_EMAIL} - - INBERLIN_PASSWORD=${INBERLIN_PASSWORD} - # Check interval in seconds (default: 300 = 5 minutes) - - CHECK_INTERVAL=${CHECK_INTERVAL:-300} - # Form data for applications - - FORM_ANREDE=${FORM_ANREDE:-Herr} - - FORM_VORNAME=${FORM_VORNAME:-Aron} - - FORM_NACHNAME=${FORM_NACHNAME:-Petau} - - FORM_EMAIL=${FORM_EMAIL:-aron@petau.net} - - FORM_PHONE=${FORM_PHONE:-017695773688} - - FORM_PERSONS=${FORM_PERSONS:-1} - - FORM_CHILDREN=${FORM_CHILDREN:-0} - - FORM_INCOME=${FORM_INCOME:-1600} + env_file: + - .env volumes: - ./data:/data diff --git a/monitor.py b/monitor.py index d2201b9..cd8dc4f 100644 --- a/monitor.py +++ b/monitor.py @@ -25,15 +25,29 @@ INBERLIN_EMAIL = os.environ.get("INBERLIN_EMAIL", "") INBERLIN_PASSWORD = os.environ.get("INBERLIN_PASSWORD", "") CHECK_INTERVAL = int(os.environ.get("CHECK_INTERVAL", "300")) # seconds (5 minutes) +# WGcompany search configuration +WGCOMPANY_ENABLED = os.environ.get("WGCOMPANY_ENABLED", "true").lower() == "true" +WGCOMPANY_MIN_SIZE = os.environ.get("WGCOMPANY_MIN_SIZE", "") # min room size m² +WGCOMPANY_MAX_SIZE = os.environ.get("WGCOMPANY_MAX_SIZE", "") # max room size m² +WGCOMPANY_MIN_PRICE = os.environ.get("WGCOMPANY_MIN_PRICE", "") # min rent € +WGCOMPANY_MAX_PRICE = os.environ.get("WGCOMPANY_MAX_PRICE", "") # max rent € +WGCOMPANY_BEZIRK = os.environ.get("WGCOMPANY_BEZIRK", "0") # 0=egal, or specific district code +WGCOMPANY_AGE = os.environ.get("WGCOMPANY_AGE", "") # your age (for WG matching) +WGCOMPANY_SMOKER = os.environ.get("WGCOMPANY_SMOKER", "") # NR=Nichtraucher, R=Raucher, empty=egal + # Form data for applications -FORM_ANREDE = os.environ.get("FORM_ANREDE", "Herr") -FORM_VORNAME = os.environ.get("FORM_VORNAME", "Aron") -FORM_NACHNAME = os.environ.get("FORM_NACHNAME", "Petau") -FORM_EMAIL = os.environ.get("FORM_EMAIL", "aron@petau.net") -FORM_PHONE = os.environ.get("FORM_PHONE", "017695773688") +FORM_ANREDE = os.environ.get("FORM_ANREDE", "") +FORM_VORNAME = os.environ.get("FORM_VORNAME", "") +FORM_NACHNAME = os.environ.get("FORM_NACHNAME", "") +FORM_EMAIL = os.environ.get("FORM_EMAIL", "") +FORM_PHONE = os.environ.get("FORM_PHONE", "") +FORM_STRASSE = os.environ.get("FORM_STRASSE", "") +FORM_HAUSNUMMER = os.environ.get("FORM_HAUSNUMMER", "") +FORM_PLZ = os.environ.get("FORM_PLZ", "") +FORM_ORT = os.environ.get("FORM_ORT", "") FORM_PERSONS = os.environ.get("FORM_PERSONS", "1") FORM_CHILDREN = os.environ.get("FORM_CHILDREN", "0") -FORM_INCOME = os.environ.get("FORM_INCOME", "1600") +FORM_INCOME = os.environ.get("FORM_INCOME", "") DATA_DIR = Path("/data") LISTINGS_FILE = DATA_DIR / "listings.json" @@ -42,6 +56,10 @@ TIMING_FILE = DATA_DIR / "listing_times.csv" STATE_FILE = DATA_DIR / "state.json" APPLICATIONS_FILE = DATA_DIR / "applications.json" +# WGcompany specific files +WGCOMPANY_LISTINGS_FILE = DATA_DIR / "wgcompany_listings.json" +WGCOMPANY_TIMING_FILE = DATA_DIR / "wgcompany_times.csv" + # Setup logging logging.basicConfig( level=logging.INFO, @@ -108,8 +126,8 @@ class TelegramBot: self._handle_help_command() elif text == "/plot": self._handle_plot_command() - else: - logger.debug(f"Unknown command: {text}") + elif text.startswith("/"): + self._handle_unknown_command(text) def _handle_autopilot_command(self, text): logger.info(f"Processing autopilot command: {text}") @@ -156,6 +174,10 @@ class TelegramBot: When autopilot is ON, I will automatically apply to new listings.""" self._send_message(help_text) + def _handle_unknown_command(self, text): + cmd = text.split()[0] if text else text + self._send_message(f"❓ Unknown command: {cmd}\n\nUse /help to see available commands.") + def _handle_plot_command(self): """Generate and send a plot of listing times""" logger.info("Generating listing times plot...") @@ -375,12 +397,56 @@ class ApplicationHandler: await asyncio.sleep(1) except: pass - # Look for "Besichtigung vereinbaren" button - logger.info("[HOWOGE] Looking for 'Besichtigung vereinbaren' button...") - apply_btn = await page.query_selector('a:has-text("Besichtigung vereinbaren"), button:has-text("Besichtigung vereinbaren"), a:has-text("Anfragen"), button:has-text("Anfragen")') + # Try to handle consent manager (consentmanager.net) + try: + consent_selectors = [ + '#cmpbntyestxt', '.cmpboxbtnyes', 'a.cmpboxbtn.cmpboxbtnyes', + '#cmpwelcomebtnyes', '.cmptxt_btn_yes' + ] + for sel in consent_selectors: + consent_btn = await page.query_selector(sel) + if consent_btn and await consent_btn.is_visible(): + await consent_btn.click() + logger.info("[HOWOGE] Dismissed consent manager") + await asyncio.sleep(1) + break + except: pass - if apply_btn and await apply_btn.is_visible(): - logger.info("[HOWOGE] Found application button, clicking...") + # Look for "Besichtigung vereinbaren" button + # HOWOGE has multiple buttons with same text - only one is visible + logger.info("[HOWOGE] Looking for 'Besichtigung vereinbaren' button...") + + # Use href selector - more reliable than text matching + selectors = [ + 'a[href*="besichtigung-vereinbaren"]', + 'a:has-text("Besichtigung vereinbaren")', + 'button:has-text("Besichtigung vereinbaren")', + 'a:has-text("Anfragen")', + 'button:has-text("Anfragen")' + ] + + apply_btn = None + for sel in selectors: + all_btns = await page.query_selector_all(sel) + logger.info(f"[HOWOGE] Selector '{sel}' found {len(all_btns)} matches") + # Find first visible button + for btn in all_btns: + try: + if await btn.is_visible(): + apply_btn = btn + logger.info(f"[HOWOGE] Found visible button with selector '{sel}'") + break + except: + pass + if apply_btn: + break + + if apply_btn: + # Scroll the button into view and click + logger.info("[HOWOGE] Found application button, scrolling into view...") + await apply_btn.scroll_into_view_if_needed() + await asyncio.sleep(0.5) + logger.info("[HOWOGE] Clicking button...") await apply_btn.click() await asyncio.sleep(3) await page.wait_for_load_state("networkidle") @@ -520,6 +586,11 @@ class ApplicationHandler: return result async def _apply_degewo(self, listing: dict, result: dict) -> dict: + """ + Degewo uses Wohnungshelden (app.wohnungshelden.de) for their application system. + The application form is loaded in an iframe from a different domain. + We need to navigate directly to the iframe URL or interact with the iframe. + """ page = await self.context.new_page() try: logger.info(f"[DEGEWO] Opening page: {listing['link']}") @@ -527,6 +598,7 @@ class ApplicationHandler: logger.info("[DEGEWO] Page loaded") await asyncio.sleep(2) + # Dismiss cookie banner try: cookie_btn = await page.query_selector('button:has-text("Alle akzeptieren"), #CybotCookiebotDialogBodyLevelButtonLevelOptinAllowAll') if cookie_btn and await cookie_btn.is_visible(): @@ -542,155 +614,186 @@ class ApplicationHandler: await apply_btn.click() await asyncio.sleep(3) - # Fill out the contact form - logger.info("[DEGEWO] Filling out contact form...") + # Degewo uses Wohnungshelden iframe for the application form + # Find the iframe and get its URL to navigate directly + iframe_element = await page.query_selector('iframe[src*="wohnungshelden.de"]') + if iframe_element: + iframe_url = await iframe_element.get_attribute('src') + logger.info(f"[DEGEWO] Found Wohnungshelden iframe: {iframe_url}") - # Anrede - select from env - try: - anrede_select = await page.query_selector('select[name*="anrede"], select[name*="salutation"], select[id*="anrede"]') - if anrede_select: - await anrede_select.select_option(label=FORM_ANREDE) - logger.info(f"[DEGEWO] Selected Anrede: {FORM_ANREDE}") - else: - # Try radio button - anrede_radio = await page.query_selector(f'input[type="radio"][value="{FORM_ANREDE}"], label:has-text("{FORM_ANREDE}") input[type="radio"]') - if anrede_radio: - await anrede_radio.click() - logger.info(f"[DEGEWO] Clicked Anrede radio: {FORM_ANREDE}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Anrede: {e}") + # Navigate to the iframe URL directly in a new page for full access + iframe_page = await self.context.new_page() + try: + await iframe_page.goto(iframe_url, wait_until="networkidle") + await asyncio.sleep(2) + logger.info("[DEGEWO] Loaded Wohnungshelden application page") - # Vorname - try: - vorname_field = await page.query_selector('input[name*="vorname"], input[name*="firstname"], input[id*="vorname"], input[placeholder*="Vorname"]') - if vorname_field: - await vorname_field.fill(FORM_VORNAME) - logger.info(f"[DEGEWO] Filled Vorname: {FORM_VORNAME}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not fill Vorname: {e}") + # Take screenshot of the Wohnungshelden form + screenshot_path = DATA_DIR / f"degewo_wohnungshelden_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[DEGEWO] Saved Wohnungshelden screenshot to {screenshot_path}") - # Nachname - try: - nachname_field = await page.query_selector('input[name*="nachname"], input[name*="lastname"], input[id*="nachname"], input[placeholder*="Nachname"]') - if nachname_field: - await nachname_field.fill(FORM_NACHNAME) - logger.info(f"[DEGEWO] Filled Nachname: {FORM_NACHNAME}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not fill Nachname: {e}") + # Save HTML for debugging + html_content = await iframe_page.content() + html_path = DATA_DIR / f"degewo_wohnungshelden_{listing['id']}.html" + with open(html_path, 'w', encoding='utf-8') as f: + f.write(html_content) + logger.info(f"[DEGEWO] Saved HTML to {html_path}") - # E-Mail - try: - email_field = await page.query_selector('input[type="email"], input[name*="email"], input[name*="mail"], input[id*="email"]') - if email_field: - await email_field.fill(FORM_EMAIL) - logger.info(f"[DEGEWO] Filled E-Mail: {FORM_EMAIL}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not fill E-Mail: {e}") + # Fill out Wohnungshelden form + # The form uses specific IDs: #firstName, #lastName, #email, etc. + form_filled = False - # Telefonnummer - try: - tel_field = await page.query_selector('input[type="tel"], input[name*="telefon"], input[name*="phone"], input[id*="telefon"]') - if tel_field: - await tel_field.fill(FORM_PHONE) - logger.info(f"[DEGEWO] Filled Telefonnummer: {FORM_PHONE}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not handle Telefon: {e}") + # Anrede (Salutation) - ng-select dropdown + try: + # Click on the salutation dropdown to open it + salutation_dropdown = await iframe_page.query_selector('#salutation-dropdown, ng-select[id*="salutation"]') + if salutation_dropdown: + await salutation_dropdown.click() + await asyncio.sleep(0.5) + # Select "Herr" or "Frau" based on FORM_ANREDE + anrede_option = await iframe_page.query_selector(f'.ng-option:has-text("{FORM_ANREDE}")') + if anrede_option: + await anrede_option.click() + logger.info(f"[DEGEWO] Selected Anrede: {FORM_ANREDE}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not set Anrede: {e}") - # Anzahl einziehende Personen - try: - personen_field = await page.query_selector('input[name*="personen"], input[name*="persons"], input[id*="personen"], select[name*="personen"]') - if personen_field: - tag_name = await personen_field.evaluate("el => el.tagName.toLowerCase()") - if tag_name == "select": - await personen_field.select_option(FORM_PERSONS) - else: - await personen_field.fill(FORM_PERSONS) - logger.info(f"[DEGEWO] Set Anzahl Personen: {FORM_PERSONS}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Personen: {e}") + # Vorname (First name) + try: + vorname_field = await iframe_page.query_selector('#firstName') + if vorname_field: + await vorname_field.fill(FORM_VORNAME) + logger.info(f"[DEGEWO] Filled Vorname: {FORM_VORNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Vorname: {e}") - # davon Anzahl Kinder - try: - kinder_field = await page.query_selector('input[name*="kinder"], input[name*="children"], input[id*="kinder"], select[name*="kinder"]') - if kinder_field: - tag_name = await kinder_field.evaluate("el => el.tagName.toLowerCase()") - if tag_name == "select": - await kinder_field.select_option(FORM_CHILDREN) - else: - await kinder_field.fill(FORM_CHILDREN) - logger.info(f"[DEGEWO] Set Anzahl Kinder: {FORM_CHILDREN}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Kinder: {e}") + # Nachname (Last name) + try: + nachname_field = await iframe_page.query_selector('#lastName') + if nachname_field: + await nachname_field.fill(FORM_NACHNAME) + logger.info(f"[DEGEWO] Filled Nachname: {FORM_NACHNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Nachname: {e}") - # Monatliches Haushaltsnettoeinkommen - try: - einkommen_field = await page.query_selector('input[name*="einkommen"], input[name*="income"], input[id*="einkommen"], select[name*="einkommen"]') - if einkommen_field: - tag_name = await einkommen_field.evaluate("el => el.tagName.toLowerCase()") - if tag_name == "select": - # Try to select by value or index - try: - await einkommen_field.select_option(FORM_INCOME) - except: - # Fallback to first non-empty option - options = await einkommen_field.query_selector_all("option") - if len(options) > 1: - await einkommen_field.select_option(index=1) - else: - await einkommen_field.fill(FORM_INCOME) - logger.info(f"[DEGEWO] Set Einkommen: {FORM_INCOME}") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set Einkommen: {e}") + # E-Mail + try: + email_field = await iframe_page.query_selector('#email') + if email_field: + await email_field.fill(FORM_EMAIL) + logger.info(f"[DEGEWO] Filled E-Mail: {FORM_EMAIL}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill E-Mail: {e}") - # "Für mich selbst" selection - try: - selbst_radio = await page.query_selector('input[type="radio"][value*="selbst"], input[type="radio"][value*="myself"], label:has-text("Für mich selbst") input') - if selbst_radio: - await selbst_radio.click() - logger.info("[DEGEWO] Selected: Für mich selbst") - except Exception as e: - logger.warning(f"[DEGEWO] Could not set 'Für mich selbst': {e}") + # Telefonnummer + try: + tel_field = await iframe_page.query_selector('input[id*="telefonnummer"]') + if tel_field: + await tel_field.fill(FORM_PHONE) + logger.info(f"[DEGEWO] Filled Telefon: {FORM_PHONE}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Telefon: {e}") - # Accept data privacy checkbox - try: - checkbox = await page.query_selector('input[type="checkbox"][name*="datenschutz"], input[type="checkbox"][name*="privacy"], input[type="checkbox"][name*="consent"]') - if checkbox and not await checkbox.is_checked(): - await checkbox.click() - logger.info("[DEGEWO] Checked privacy/consent checkbox") - except Exception as e: - logger.warning(f"[DEGEWO] Could not check consent: {e}") + # Anzahl einziehende Personen + try: + personen_field = await iframe_page.query_selector('input[id*="numberPersonsTotal"]') + if personen_field: + await personen_field.fill(FORM_PERSONS) + logger.info(f"[DEGEWO] Filled Anzahl Personen: {FORM_PERSONS}") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not fill Anzahl Personen: {e}") - await asyncio.sleep(1) + # "Für sich selbst" dropdown + try: + selbst_dropdown = await iframe_page.query_selector('ng-select[id*="fuer_wen"]') + if selbst_dropdown: + await selbst_dropdown.click() + await asyncio.sleep(0.5) + # Select "Für mich selbst" + selbst_option = await iframe_page.query_selector('.ng-option:has-text("Für mich selbst"), .ng-option:has-text("selbst")') + if selbst_option: + await selbst_option.click() + logger.info("[DEGEWO] Selected: Für mich selbst") + form_filled = True + except Exception as e: + logger.warning(f"[DEGEWO] Could not set 'Für sich selbst': {e}") - # Take screenshot before submitting - screenshot_path = DATA_DIR / f"degewo_form_{listing['id']}.png" - await page.screenshot(path=str(screenshot_path), full_page=True) - logger.info(f"[DEGEWO] Saved form screenshot to {screenshot_path}") + await asyncio.sleep(1) - # Submit the form - try: - submit_btn = await page.query_selector('button[type="submit"], input[type="submit"], button:has-text("Absenden"), button:has-text("Senden")') - if submit_btn and await submit_btn.is_visible(): - await submit_btn.click() - logger.info("[DEGEWO] Clicked submit button") - await asyncio.sleep(3) + # Take screenshot after filling form + screenshot_path = DATA_DIR / f"degewo_form_filled_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[DEGEWO] Saved filled form screenshot to {screenshot_path}") - # Take screenshot after submission - screenshot_path = DATA_DIR / f"degewo_submitted_{listing['id']}.png" - await page.screenshot(path=str(screenshot_path), full_page=True) - logger.info(f"[DEGEWO] Saved submission screenshot to {screenshot_path}") + # Try to submit + try: + # Look for submit button with various patterns + submit_selectors = [ + 'button[type="submit"]', + 'input[type="submit"]', + 'button:has-text("Absenden")', + 'button:has-text("Senden")', + 'button:has-text("Anfrage")', + 'button:has-text("Bewerben")', + 'button:has-text("Submit")', + '.btn-primary', + '.submit-btn', + ] - result["success"] = True - result["message"] = "Application submitted" - else: - result["success"] = True - result["message"] = "Form filled, submit button not found" - logger.warning("[DEGEWO] Submit button not found") - except Exception as e: - result["success"] = True - result["message"] = f"Form filled, submit error: {str(e)}" - logger.warning(f"[DEGEWO] Submit error: {e}") + submit_btn = None + for selector in submit_selectors: + submit_btn = await iframe_page.query_selector(selector) + if submit_btn and await submit_btn.is_visible(): + logger.info(f"[DEGEWO] Found submit button with selector: {selector}") + break + submit_btn = None + if submit_btn: + await submit_btn.click() + logger.info("[DEGEWO] Clicked submit button") + await asyncio.sleep(3) + + # Take screenshot after submission + screenshot_path = DATA_DIR / f"degewo_submitted_{listing['id']}.png" + await iframe_page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[DEGEWO] Saved submission screenshot to {screenshot_path}") + + result["success"] = True + result["message"] = "Application submitted via Wohnungshelden" + else: + # Submit button not found - this is a failure + result["success"] = False + result["message"] = "Wohnungshelden form loaded but submit button not found" + logger.warning("[DEGEWO] Submit button not found in Wohnungshelden form") + except Exception as e: + result["success"] = False + result["message"] = f"Wohnungshelden submit error: {str(e)}" + logger.warning(f"[DEGEWO] Submit error: {e}") + finally: + await iframe_page.close() + else: + # No iframe found - try the old approach (fallback for different page structure) + logger.warning("[DEGEWO] Wohnungshelden iframe not found, trying direct form...") + + # Take screenshot for debugging + screenshot_path = DATA_DIR / f"degewo_noiframe_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path), full_page=True) + + # Save HTML for debugging + html_content = await page.content() + html_path = DATA_DIR / "degewo_debug.html" + with open(html_path, 'w', encoding='utf-8') as f: + f.write(html_content) + + result["success"] = False + result["message"] = "Wohnungshelden iframe not found on page" else: result["message"] = "No kontaktieren button found" logger.warning("[DEGEWO] Could not find kontaktieren button") @@ -762,22 +865,138 @@ class ApplicationHandler: await asyncio.sleep(1) except: pass - logger.info("[STADTUNDLAND] Looking for application button...") - apply_btn = await page.query_selector('a:has-text("Anfragen"), button:has-text("Bewerben"), a:has-text("Interesse")') - if apply_btn and await apply_btn.is_visible(): - logger.info("[STADTUNDLAND] Found application button, clicking...") - await apply_btn.click() - await asyncio.sleep(2) + # Stadt und Land has the contact form directly on the page + logger.info("[STADTUNDLAND] Looking for contact form fields...") - screenshot_path = DATA_DIR / f"stadtundland_{listing['id']}.png" - await page.screenshot(path=str(screenshot_path)) - logger.info(f"[STADTUNDLAND] Saved screenshot to {screenshot_path}") + form_filled = False - result["success"] = True - result["message"] = "Application page opened" + # Fill Vorname + try: + vorname_field = await page.query_selector('input[name*="vorname" i], input[placeholder*="Vorname" i], input#vorname') + if vorname_field: + await vorname_field.fill(FORM_VORNAME) + logger.info(f"[STADTUNDLAND] Filled Vorname: {FORM_VORNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Vorname: {e}") + + # Fill Nachname + try: + nachname_field = await page.query_selector('input[name*="nachname" i], input[placeholder*="Nachname" i], input#nachname') + if nachname_field: + await nachname_field.fill(FORM_NACHNAME) + logger.info(f"[STADTUNDLAND] Filled Nachname: {FORM_NACHNAME}") + form_filled = True + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Nachname: {e}") + + # Fill Telefonnummer + try: + tel_field = await page.query_selector('input[name*="telefon" i], input[type="tel"], input[placeholder*="Telefon" i]') + if tel_field: + await tel_field.fill(FORM_PHONE) + logger.info(f"[STADTUNDLAND] Filled Telefon: {FORM_PHONE}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Telefon: {e}") + + # Fill E-Mail + try: + email_field = await page.query_selector('input[type="email"], input[name*="email" i], input[name*="mail" i]') + if email_field: + await email_field.fill(FORM_EMAIL) + logger.info(f"[STADTUNDLAND] Filled E-Mail: {FORM_EMAIL}") + form_filled = True + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill E-Mail: {e}") + + # Fill Straße (street) + try: + strasse_field = await page.query_selector('input[name*="strasse" i], input[name*="straße" i], input[placeholder*="Straße" i], input#strasse') + if strasse_field and FORM_STRASSE: + await strasse_field.fill(FORM_STRASSE) + logger.info(f"[STADTUNDLAND] Filled Straße: {FORM_STRASSE}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Straße: {e}") + + # Fill Hausnummer + try: + hausnummer_field = await page.query_selector('input[name*="hausnummer" i], input[name*="hausnr" i], input[placeholder*="Hausnummer" i], input#hausnummer') + if hausnummer_field and FORM_HAUSNUMMER: + await hausnummer_field.fill(FORM_HAUSNUMMER) + logger.info(f"[STADTUNDLAND] Filled Hausnummer: {FORM_HAUSNUMMER}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Hausnummer: {e}") + + # Fill PLZ + try: + plz_field = await page.query_selector('input[name*="plz" i], input[placeholder*="PLZ" i], input#plz') + if plz_field and FORM_PLZ: + await plz_field.fill(FORM_PLZ) + logger.info(f"[STADTUNDLAND] Filled PLZ: {FORM_PLZ}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill PLZ: {e}") + + # Fill Ort (city) + try: + ort_field = await page.query_selector('input[name*="ort" i], input[placeholder*="Ort" i], input#ort') + if ort_field and FORM_ORT: + await ort_field.fill(FORM_ORT) + logger.info(f"[STADTUNDLAND] Filled Ort: {FORM_ORT}") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not fill Ort: {e}") + + # Check Datenschutz checkbox + try: + datenschutz_checkbox = await page.query_selector('input[type="checkbox"][name*="datenschutz" i], input[type="checkbox"][name*="privacy" i]') + if datenschutz_checkbox and not await datenschutz_checkbox.is_checked(): + await datenschutz_checkbox.click() + logger.info("[STADTUNDLAND] Checked Datenschutz checkbox") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not check Datenschutz: {e}") + + # Check Provision checkbox + try: + provision_checkbox = await page.query_selector('input[type="checkbox"][name*="provision" i]') + if provision_checkbox and not await provision_checkbox.is_checked(): + await provision_checkbox.click() + logger.info("[STADTUNDLAND] Checked Provision checkbox") + except Exception as e: + logger.warning(f"[STADTUNDLAND] Could not check Provision: {e}") + + await asyncio.sleep(1) + + # Screenshot before submitting + screenshot_path = DATA_DIR / f"stadtundland_form_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[STADTUNDLAND] Saved form screenshot to {screenshot_path}") + + if form_filled: + # Submit the form - look for submit button + try: + submit_btn = await page.query_selector('button[type="submit"], input[type="submit"], button:has-text("prüfen"), button:has-text("Absenden"), button:has-text("Senden")') + if submit_btn and await submit_btn.is_visible(): + await submit_btn.click() + logger.info("[STADTUNDLAND] Clicked submit button") + await asyncio.sleep(3) + + # Screenshot after submission + screenshot_path = DATA_DIR / f"stadtundland_submitted_{listing['id']}.png" + await page.screenshot(path=str(screenshot_path), full_page=True) + logger.info(f"[STADTUNDLAND] Saved submission screenshot to {screenshot_path}") + + result["success"] = True + result["message"] = "Application submitted" + else: + result["success"] = True + result["message"] = "Form filled, submit button not found" + logger.warning("[STADTUNDLAND] Submit button not found") + except Exception as e: + result["success"] = True + result["message"] = f"Form filled, submit error: {str(e)}" + logger.warning(f"[STADTUNDLAND] Submit error: {e}") else: - result["message"] = "No application button found" - logger.warning("[STADTUNDLAND] Could not find application button") + result["message"] = "No form fields found" + logger.warning("[STADTUNDLAND] Could not find form fields") screenshot_path = DATA_DIR / f"stadtundland_nobtn_{listing['id']}.png" await page.screenshot(path=str(screenshot_path)) except Exception as e: @@ -1284,26 +1503,331 @@ class InBerlinMonitor: return await self.apply_to_listings(listings) +class WGCompanyMonitor: + """Monitor WGcompany.de for new WG room listings""" + + def __init__(self): + self.browser = None + self.context = None + + async def init_browser(self): + """Initialize Playwright browser""" + if self.browser is None: + self.playwright = await async_playwright().start() + self.browser = await self.playwright.chromium.launch(headless=True) + self.context = await self.browser.new_context( + user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36" + ) + logger.info("[WGCOMPANY] Browser initialized") + + async def fetch_listings(self) -> list[dict]: + """Fetch WG listings from wgcompany.de search""" + listings = [] + + try: + page = await self.context.new_page() + + # Use simple search page: st=1 (Berlin), mi=10 (simple WG search), li=100 + search_url = "http://www.wgcompany.de/cgi-bin/seite?st=1&mi=10&li=100" + logger.info(f"[WGCOMPANY] Loading search page: {search_url}") + await page.goto(search_url, wait_until="networkidle") + await asyncio.sleep(2) + + # Fill search form - field names from simple search: + # c = Min. Größe (min size m²) + # a = Max. Miete (max rent €) + # l = Alter (age) + # e = Bezirk (district select) + + # Min size field + if WGCOMPANY_MIN_SIZE: + min_size_field = await page.query_selector('input[name="c"]') + if min_size_field: + await min_size_field.fill(WGCOMPANY_MIN_SIZE) + logger.info(f"[WGCOMPANY] Set min size: {WGCOMPANY_MIN_SIZE} m²") + + # Max rent field + if WGCOMPANY_MAX_PRICE: + max_price_field = await page.query_selector('input[name="a"]') + if max_price_field: + await max_price_field.fill(WGCOMPANY_MAX_PRICE) + logger.info(f"[WGCOMPANY] Set max rent: {WGCOMPANY_MAX_PRICE} €") + + # Age field (l = Alter) + if WGCOMPANY_AGE: + age_field = await page.query_selector('input[name="l"]') + if age_field: + await age_field.fill(WGCOMPANY_AGE) + logger.info(f"[WGCOMPANY] Set age: {WGCOMPANY_AGE}") + + # Smoker filter (o = RaucherIn: NR=Nichtraucher, R=Raucher) + if WGCOMPANY_SMOKER: + smoker_select = await page.query_selector('select[name="o"]') + if smoker_select: + await smoker_select.select_option(WGCOMPANY_SMOKER) + logger.info(f"[WGCOMPANY] Set smoker: {WGCOMPANY_SMOKER}") + + # District selection (e = Bezirk, multi-select) + # Leave as default "egal" (all districts) unless specified + if WGCOMPANY_BEZIRK and WGCOMPANY_BEZIRK != "0": + bezirk_select = await page.query_selector('select[name="e"]') + if bezirk_select: + await bezirk_select.select_option(WGCOMPANY_BEZIRK) + logger.info(f"[WGCOMPANY] Set district: {WGCOMPANY_BEZIRK}") + + # Submit the search form + submit_btn = await page.query_selector('input[type="submit"][value*="finde"], input[type="submit"]') + if submit_btn: + logger.info("[WGCOMPANY] Submitting search form...") + await submit_btn.click() + await page.wait_for_load_state("networkidle") + await asyncio.sleep(2) + + # Get results page content + content = await page.content() + + # Save debug HTML + debug_path = DATA_DIR / "wgcompany_debug.html" + with open(debug_path, "w", encoding="utf-8") as f: + f.write(content) + logger.info(f"[WGCOMPANY] Saved debug HTML to {debug_path}") + + # Parse listings from the results page + # WGcompany results typically have tables with room info + # Look for listing links and extract data + + # Pattern to find listing detail links + # Format: wg.pl?...function=wgzeigen... with room details in table rows + listing_links = await page.query_selector_all('a[href*="wg.pl"][href*="wgzeigen"]') + logger.info(f"[WGCOMPANY] Found {len(listing_links)} listing links") + + for link_elem in listing_links: + try: + href = await link_elem.get_attribute("href") + if not href: + continue + + # Get surrounding text/row for listing details + parent = await link_elem.evaluate_handle("el => el.closest('tr') || el.parentElement") + row_text = await parent.evaluate("el => el.innerText") if parent else "" + + # Extract price from row text (e.g., "350 €" or "350€") + price_match = re.search(r'(\d+)\s*€', row_text) + price = price_match.group(1) + " €" if price_match else "?" + + # Extract size (e.g., "15 m²" or "15m²") + size_match = re.search(r'(\d+)\s*m²', row_text) + size = size_match.group(1) + " m²" if size_match else "?" + + # Extract district/location + # Common Berlin districts in text + bezirk_patterns = [ + "Kreuzberg", "Neukölln", "Friedrichshain", "Prenzlauer Berg", + "Mitte", "Wedding", "Charlottenburg", "Schöneberg", "Tempelhof", + "Steglitz", "Wilmersdorf", "Pankow", "Lichtenberg", "Treptow", + "Köpenick", "Reinickendorf", "Spandau", "Zehlendorf", "Moabit" + ] + location = "Berlin" + for bez in bezirk_patterns: + if bez.lower() in row_text.lower(): + location = bez + break + + # Make absolute URL + if not href.startswith("http"): + href = f"http://www.wgcompany.de{href}" if href.startswith("/") else f"http://www.wgcompany.de/cgi-bin/{href}" + + # Generate unique ID from link and key details + listing_id = hashlib.md5(f"{href}{price}{size}".encode()).hexdigest()[:12] + + listings.append({ + "id": listing_id, + "rooms": "1 Zimmer (WG)", + "size": size, + "price": price, + "address": location, + "link": href, + "source": "wgcompany", + "fetched_at": datetime.now().isoformat() + }) + except Exception as e: + logger.debug(f"[WGCOMPANY] Error parsing listing: {e}") + continue + + # Deduplicate by id + seen_ids = set() + unique_listings = [] + for listing in listings: + if listing["id"] not in seen_ids: + seen_ids.add(listing["id"]) + unique_listings.append(listing) + listings = unique_listings + + await page.close() + logger.info(f"[WGCOMPANY] Fetched {len(listings)} unique listings") + return listings + + except Exception as e: + logger.error(f"[WGCOMPANY] Error fetching listings: {e}") + import traceback + logger.error(traceback.format_exc()) + return [] + + def load_previous_listings(self) -> dict: + """Load previously saved WGcompany listings""" + if WGCOMPANY_LISTINGS_FILE.exists(): + with open(WGCOMPANY_LISTINGS_FILE, "r") as f: + return json.load(f) + return {} + + def save_listings(self, listings: list[dict]): + """Save current WGcompany listings""" + listings_dict = {l["id"]: l for l in listings} + with open(WGCOMPANY_LISTINGS_FILE, "w") as f: + json.dump(listings_dict, f, indent=2, ensure_ascii=False) + + def find_new_listings(self, current: list[dict], previous: dict) -> list[dict]: + """Find listings that are new since last check""" + new = [] + for listing in current: + if listing["id"] not in previous: + new.append(listing) + return new + + def send_telegram(self, message: str): + """Send notification via Telegram""" + if not TELEGRAM_BOT_TOKEN or not TELEGRAM_CHAT_ID: + logger.warning("[WGCOMPANY] Telegram not configured, skipping notification") + return + + try: + url = f"https://api.telegram.org/bot{TELEGRAM_BOT_TOKEN}/sendMessage" + data = { + "chat_id": TELEGRAM_CHAT_ID, + "text": message, + "parse_mode": "HTML", + "disable_web_page_preview": True + } + response = requests.post(url, data=data) + if response.ok: + logger.info("[WGCOMPANY] Telegram notification sent") + else: + logger.error(f"[WGCOMPANY] Telegram error: {response.text}") + except Exception as e: + logger.error(f"[WGCOMPANY] Telegram error: {e}") + + def log_listing_times(self, new_listings: list[dict]): + """Log new WGcompany listing appearance times to CSV""" + if not new_listings: + return + + file_exists = WGCOMPANY_TIMING_FILE.exists() + + with open(WGCOMPANY_TIMING_FILE, "a", newline="", encoding="utf-8") as f: + writer = csv.writer(f) + if not file_exists: + writer.writerow(["timestamp", "weekday", "hour", "minute", "rooms", "size", "price", "address", "listing_id"]) + + now = datetime.now() + for listing in new_listings: + writer.writerow([ + now.isoformat(), + now.strftime("%A"), + now.hour, + now.minute, + listing["rooms"], + listing["size"], + listing["price"], + listing["address"], + listing["id"] + ]) + + logger.info(f"[WGCOMPANY] Logged {len(new_listings)} listing times to CSV") + + def notify_new_listings(self, new_listings: list[dict]): + """Send individual notification for each new WGcompany listing""" + if not new_listings: + return + + for listing in new_listings: + message = f"🏠 Neues WG-Zimmer! (WGcompany)\n\n" + message += f"🚪 {listing['rooms']}\n" + message += f"📐 {listing['size']}\n" + message += f"💰 {listing['price']}\n" + message += f"📍 {listing['address']}\n\n" + message += f"👉 Zum Angebot" + + self.send_telegram(message) + time.sleep(0.5) + + def check(self): + """Run a single check for new WGcompany listings""" + logger.info("[WGCOMPANY] Starting check...") + + # Fetch current listings + current_listings = asyncio.get_event_loop().run_until_complete(self._async_fetch()) + if not current_listings: + logger.warning("[WGCOMPANY] No listings fetched") + return + + # Load previous listings + previous_listings = self.load_previous_listings() + + # First run - just save baseline + if not previous_listings: + logger.info(f"[WGCOMPANY] First run - saving {len(current_listings)} listings as baseline") + self.save_listings(current_listings) + return + + # Find new listings + new_listings = self.find_new_listings(current_listings, previous_listings) + + if new_listings: + logger.info(f"[WGCOMPANY] Found {len(new_listings)} new listing(s)") + self.log_listing_times(new_listings) + self.notify_new_listings(new_listings) + else: + logger.info("[WGCOMPANY] No new listings") + + # Save current state + self.save_listings(current_listings) + + async def _async_fetch(self): + await self.init_browser() + return await self.fetch_listings() + + def main(): """Main entry point""" # Ensure data directory exists DATA_DIR.mkdir(parents=True, exist_ok=True) - monitor = InBerlinMonitor() + # Initialize monitors + inberlin_monitor = InBerlinMonitor() + wgcompany_monitor = WGCompanyMonitor() if WGCOMPANY_ENABLED else None # Start Telegram command listener - telegram_bot = TelegramBot(monitor) + telegram_bot = TelegramBot(inberlin_monitor) telegram_bot.start() - logger.info(f"inberlin-monitor started (interval: {CHECK_INTERVAL}s)") - logger.info(f"Autopilot: {'ENABLED' if monitor.is_autopilot_enabled() else 'DISABLED'}") + logger.info(f"Monitor started (interval: {CHECK_INTERVAL}s)") + logger.info(f"InBerlin Autopilot: {'ENABLED' if inberlin_monitor.is_autopilot_enabled() else 'DISABLED'}") + logger.info(f"WGcompany: {'ENABLED' if WGCOMPANY_ENABLED else 'DISABLED'}") while True: + # Check InBerlinWohnen try: - monitor.check() + inberlin_monitor.check() except Exception as e: - logger.error(f"Check failed: {e}") + logger.error(f"InBerlin check failed: {e}") + + # Check WGcompany + if wgcompany_monitor: + try: + wgcompany_monitor.check() + except Exception as e: + logger.error(f"WGcompany check failed: {e}") time.sleep(CHECK_INTERVAL) diff --git a/requirements.txt b/requirements.txt index 6303c2e..2a0a493 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ requests>=2.31.0 -playwright>=1.49.0 +playwright>=1.57.0 matplotlib>=3.8.0 pandas>=2.0.0