This commit is contained in:
Aron Petau 2026-01-01 15:27:25 +01:00
parent d596ed7e19
commit aa6626d80d
21 changed files with 1051 additions and 333 deletions

119
main.py
View file

@ -20,41 +20,94 @@ load_dotenv()
# Configure logging: file (rotating) + console for Docker visibility, enforce for all modules
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s - %(levelname)s - %(message)s",
format="%(asctime)s [%(levelname)-5s] %(name)-20s | %(message)s",
handlers=[
RotatingFileHandler("data/monitor.log", maxBytes=1 * 1024 * 1024, backupCount=5), # 1 MB per file, 5 backups
RotatingFileHandler("data/monitor.log", maxBytes=1 * 1024 * 1024, backupCount=3), # 1 MB per file, 3 backups
logging.StreamHandler()
],
force=True # Enforce for all modules, Python 3.8+
)
logger = logging.getLogger() # Use root logger for universal logging
logger.info("Logging initialized: outputting to both data/monitor.log and console (Docker logs)")
logger = logging.getLogger(__name__) # Use named logger
logger.info("🚀 Bot starting | Logs: data/monitor.log + console")
# Interval (seconds) between checks for new listings
CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", 300)) # Default: 300 seconds
def _flush_rotating_file_handlers():
def validate_config() -> bool:
"""Validate required environment variables on startup with clear error messages."""
errors: list[str] = []
warnings: list[str] = []
# Required for Telegram notifications
if not os.getenv("TELEGRAM_BOT_TOKEN"):
errors.append("TELEGRAM_BOT_TOKEN is not set - notifications will not work")
if not os.getenv("TELEGRAM_CHAT_ID"):
errors.append("TELEGRAM_CHAT_ID is not set - notifications will not work")
# Required for InBerlin login and auto-apply
if not os.getenv("INBERLIN_EMAIL"):
warnings.append("INBERLIN_EMAIL is not set - will use public listings only")
if not os.getenv("INBERLIN_PASSWORD"):
warnings.append("INBERLIN_PASSWORD is not set - will use public listings only")
# Required for auto-apply form filling
form_fields = [
"FORM_ANREDE", "FORM_VORNAME", "FORM_NACHNAME", "FORM_EMAIL",
"FORM_PHONE", "FORM_STRASSE", "FORM_HAUSNUMMER", "FORM_PLZ",
"FORM_ORT", "FORM_PERSONS", "FORM_CHILDREN", "FORM_INCOME"
]
missing_form_fields = [f for f in form_fields if not os.getenv(f)]
if missing_form_fields:
warnings.append(f"Form fields not set: {', '.join(missing_form_fields)} - autopilot may fail")
# Print warnings
if warnings:
logger.warning("Configuration warnings:")
for warning in warnings:
logger.warning(f" - {warning}")
# Print errors and exit if critical
if errors:
logger.error("Configuration errors - bot cannot start:")
for error in errors:
logger.error(f" - {error}")
logger.error("Please set required environment variables in .env file")
return False
logger.info("Configuration validated successfully")
return True
def _flush_rotating_file_handlers() -> None:
"""Flush all RotatingFileHandlers attached to the root logger."""
root_logger = logging.getLogger()
for handler in root_logger.handlers:
if isinstance(handler, RotatingFileHandler):
handler.flush()
async def main():
logger.info("Starting the bot...")
# Initialize state manager
state_manager = StateManager(Path("data/state.json"))
# --- Playwright browser/context setup ---
async def init_browser_context() -> tuple:
"""Initialize or reinitialize browser context with error handling."""
playwright = await async_playwright().start()
browser = await playwright.chromium.launch(headless=True)
browser_context = await browser.new_context(
user_agent="Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
)
logger.info("Playwright browser context initialized.")
return playwright, browser, browser_context
async def main() -> None:
logger.info("🤖 Initializing wohn-bot...")
# Validate configuration before starting
if not validate_config():
return
# Initialize state manager
state_manager = StateManager(Path("data/state.json"))
# --- Playwright browser/context setup with recovery ---
logger.info("🌐 Initializing browser...")
playwright, browser, browser_context = await init_browser_context()
# Application handler manages browser/context
app_handler = ApplicationHandler(browser_context, state_manager)
@ -78,23 +131,45 @@ async def main():
now = asyncio.get_event_loop().time()
# Autoclean debug material every 48 hours
if now - last_clean > CLEAN_INTERVAL:
logger.info("Running autoclean_debug_material (48h interval)...")
try:
deleted = autoclean_debug_material()
logger.info(f"Autocleaned {len(deleted)} debug files.")
if deleted:
logger.info(f"🧹 Cleaned {len(deleted)} debug files (48h)")
except Exception as e:
logger.warning(f"Autoclean failed: {e}")
logger.warning(f"⚠️ Autoclean failed: {e}")
last_clean = now
current_listings = await app_handler.fetch_listings()
try:
current_listings = await app_handler.fetch_listings()
except Exception as e:
logger.error(f"💥 Browser crash: {e}")
logger.info("🔄 Recovering...")
try:
await browser.close()
await playwright.stop()
except:
pass
# Reinitialize browser
try:
playwright, browser, browser_context = await init_browser_context()
app_handler.context = browser_context
logger.info("✅ Browser recovered")
await asyncio.sleep(5)
continue
except Exception as recovery_error:
logger.error(f"Failed to recover: {recovery_error}")
await asyncio.sleep(60)
continue
if not current_listings:
logger.warning("No listings fetched")
logger.warning("⚠️ No listings fetched")
await asyncio.sleep(CHECK_INTERVAL)
_flush_rotating_file_handlers()
continue
previous_listings = app_handler.load_previous_listings()
if not previous_listings:
logger.info(f"First run - saving {len(current_listings)} listings as baseline and marking as failed applications")
logger.info(f"🎬 First run: saving {len(current_listings)} listings as baseline")
# Mark all as failed applications so /retryfailed can be used
for listing in current_listings:
result = {
@ -117,10 +192,10 @@ async def main():
new_listings = app_handler.find_new_listings(current_listings, previous_listings)
application_results = {}
if new_listings:
logger.info(f"Found {len(new_listings)} new listing(s)")
logger.info(f"\ud83c\udfe0 {len(new_listings)} new listing{'s' if len(new_listings) > 1 else ''} detected")
app_handler.log_listing_times(new_listings)
if app_handler.is_autopilot_enabled():
logger.info("Autopilot enabled - applying to listings...")
logger.info("\ud83e\udd16 Autopilot active - applying...")
application_results = await app_handler.apply_to_listings(new_listings)
app_handler.notify_new_listings(new_listings, application_results)
app_handler.save_listings(current_listings)