internal logic error
This commit is contained in:
parent
044ef111ac
commit
cc40121e46
2 changed files with 141 additions and 7 deletions
|
|
@ -2,6 +2,7 @@ import asyncio
|
|||
import logging
|
||||
import hashlib
|
||||
import re
|
||||
import csv
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
import json
|
||||
|
|
@ -14,6 +15,7 @@ logger = logging.getLogger(__name__)
|
|||
|
||||
WGCOMPANY_LISTINGS_FILE = Path("data/wgcompany_listings.json")
|
||||
WGCOMPANY_TIMING_FILE = Path("data/wgcompany_times.csv")
|
||||
CONTACTS_FILE = Path("data/contacts.csv")
|
||||
|
||||
# Environment variables for search filters
|
||||
WGCOMPANY_MIN_SIZE = os.environ.get("WGCOMPANY_MIN_SIZE", "")
|
||||
|
|
@ -157,10 +159,76 @@ class WGCompanyNotifier:
|
|||
logger.info(f"[WG] {len(new)} new listing{'s' if len(new) > 1 else ''} detected")
|
||||
return new
|
||||
|
||||
async def fetch_listing_details(self, listing_url: str) -> dict:
|
||||
"""Fetch detailed information from a listing page including email."""
|
||||
details = {
|
||||
"email": "",
|
||||
"contact_person": "",
|
||||
"address": "",
|
||||
"description": "",
|
||||
"wg_name": ""
|
||||
}
|
||||
try:
|
||||
assert self.context is not None, "Browser context not initialized"
|
||||
page = await self.context.new_page()
|
||||
await page.goto(listing_url, wait_until="networkidle")
|
||||
await asyncio.sleep(1)
|
||||
|
||||
content = await page.content()
|
||||
|
||||
# Extract email (look for patterns like email: xxx@yyy.zz or Email: xxx)
|
||||
email_patterns = [
|
||||
r'[Ee]-?[Mm]ail[:\s]+([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})',
|
||||
r'([a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,})'
|
||||
]
|
||||
for pattern in email_patterns:
|
||||
email_match = re.search(pattern, content)
|
||||
if email_match:
|
||||
details["email"] = email_match.group(1)
|
||||
break
|
||||
|
||||
# Extract WG name from URL
|
||||
wg_match = re.search(r'wg=([^&]+)', listing_url)
|
||||
if wg_match:
|
||||
details["wg_name"] = wg_match.group(1)
|
||||
|
||||
# Try to extract address or location details
|
||||
text = await page.inner_text('body')
|
||||
|
||||
# Look for address patterns
|
||||
addr_patterns = [
|
||||
r'((?:[A-ZÄÖÜ][a-zäöüß]+(?:straße|str\.|platz|weg|allee))\s*\d+)',
|
||||
r'Adresse[:\s]+([^\n]+)',
|
||||
r'Lage[:\s]+([^\n]+)'
|
||||
]
|
||||
for pattern in addr_patterns:
|
||||
addr_match = re.search(pattern, text, re.IGNORECASE)
|
||||
if addr_match:
|
||||
details["address"] = addr_match.group(1).strip()
|
||||
break
|
||||
|
||||
# Extract contact person name if available
|
||||
contact_patterns = [
|
||||
r'Kontakt[:\s]+([A-ZÄÖÜ][a-zäöüß]+(?:\s+[A-ZÄÖÜ][a-zäöüß]+)?)',
|
||||
r'Ansprechpartner[:\s]+([A-ZÄÖÜ][a-zäöüß]+(?:\s+[A-ZÄÖÜ][a-zäöüß]+)?)',
|
||||
]
|
||||
for pattern in contact_patterns:
|
||||
contact_match = re.search(pattern, text)
|
||||
if contact_match:
|
||||
details["contact_person"] = contact_match.group(1).strip()
|
||||
break
|
||||
|
||||
await page.close()
|
||||
logger.debug(f"[WG] Fetched details: email={details['email']}, wg={details['wg_name']}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[WG] Error fetching listing details: {e}")
|
||||
|
||||
return details
|
||||
|
||||
def log_listing_times(self, new_listings):
|
||||
if not new_listings:
|
||||
return
|
||||
import csv
|
||||
file_exists = WGCOMPANY_TIMING_FILE.exists()
|
||||
with open(WGCOMPANY_TIMING_FILE, "a", newline="", encoding="utf-8") as f:
|
||||
writer = csv.writer(f)
|
||||
|
|
@ -181,6 +249,65 @@ class WGCompanyNotifier:
|
|||
])
|
||||
logger.debug(f"[WG] Logged {len(new_listings)} to CSV")
|
||||
|
||||
async def save_to_contacts(self, listing: dict, details: dict) -> None:
|
||||
"""Save new listing to contacts.csv with details."""
|
||||
try:
|
||||
# Check if contacts file exists, create with header if not
|
||||
file_exists = CONTACTS_FILE.exists()
|
||||
|
||||
# Read existing contacts to avoid duplicates
|
||||
existing_urls = set()
|
||||
if file_exists:
|
||||
with open(CONTACTS_FILE, 'r', newline='', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
if row.get('ListingLink'):
|
||||
existing_urls.add(row['ListingLink'])
|
||||
|
||||
# Skip if already exists
|
||||
if listing['link'] in existing_urls:
|
||||
logger.debug(f"[WG] Listing already in contacts: {listing['link']}")
|
||||
return
|
||||
|
||||
# Prepare row data
|
||||
wg_name = details.get('wg_name', '')
|
||||
contact_person = details.get('contact_person', '')
|
||||
address_full = details.get('address', '') or listing.get('address', '')
|
||||
|
||||
# Combine room info with listing details for Notes
|
||||
notes = f"{listing.get('size', '')} / {listing.get('rooms', '')}; {listing.get('price', '')}"
|
||||
|
||||
row = {
|
||||
'Name': f"WG {wg_name}" if wg_name else "WG (unnamed)",
|
||||
'ContactPerson': contact_person,
|
||||
'Platform': 'WGcompany',
|
||||
'DateContacted': '', # Empty - user will fill when contacting
|
||||
'Address': address_full,
|
||||
'ListingLink': listing['link'],
|
||||
'ContactMethod': f"email: {details.get('email', '')}" if details.get('email') else 'wgcompany message',
|
||||
'Response': '',
|
||||
'FollowUpDate': '',
|
||||
'PreferredMoveIn': '',
|
||||
'Notes': notes,
|
||||
'Status': 'open'
|
||||
}
|
||||
|
||||
# Append to CSV
|
||||
with open(CONTACTS_FILE, 'a', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=[
|
||||
'Name', 'ContactPerson', 'Platform', 'DateContacted', 'Address',
|
||||
'ListingLink', 'ContactMethod', 'Response', 'FollowUpDate',
|
||||
'PreferredMoveIn', 'Notes', 'Status'
|
||||
])
|
||||
if not file_exists:
|
||||
writer.writeheader()
|
||||
writer.writerow(row)
|
||||
|
||||
logger.info(f"[WG] Saved to contacts.csv: {row['Name']} - {details.get('email', 'no email')}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[WG] Error saving to contacts: {e}")
|
||||
|
||||
async def notify_new_listings(self, new_listings: list[dict]) -> None:
|
||||
if not new_listings or not self.telegram_bot:
|
||||
return
|
||||
|
|
@ -210,6 +337,13 @@ class WGCompanyNotifier:
|
|||
if new_listings:
|
||||
logger.info(f"[WGCOMPANY] Found {len(new_listings)} new listing(s)")
|
||||
self.log_listing_times(new_listings)
|
||||
|
||||
# Fetch details and save to contacts for each new listing
|
||||
for listing in new_listings:
|
||||
details = await self.fetch_listing_details(listing['link'])
|
||||
await self.save_to_contacts(listing, details)
|
||||
await asyncio.sleep(1) # Be polite with requests
|
||||
|
||||
await self.notify_new_listings(new_listings)
|
||||
else:
|
||||
logger.info("[WGCOMPANY] No new listings")
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue