From b4e84e430e7ef53d438ebdb219916542a2cbccd1 Mon Sep 17 00:00:00 2001 From: Aron Date: Mon, 12 Jan 2026 16:46:58 +0100 Subject: [PATCH] fix session expired and try variant timing --- .env.example | 6 ++- application_handler.py | 9 ++++ main.py | 48 ++++++++++++++++--- telegram_bot.py | 9 +++- tests/test_randomized_timing.py | 66 +++++++++++++++++++++++++ tests/test_session_validation.py | 82 ++++++++++++++++++++++++++++++++ 6 files changed, 211 insertions(+), 9 deletions(-) create mode 100644 tests/test_randomized_timing.py create mode 100644 tests/test_session_validation.py diff --git a/.env.example b/.env.example index bd1e076..d86a3ba 100644 --- a/.env.example +++ b/.env.example @@ -7,8 +7,10 @@ TELEGRAM_CHAT_ID=your_chat_id_here INBERLIN_EMAIL=your_email@example.com INBERLIN_PASSWORD=your_password_here -# Check interval in seconds (default: 300 = 5 minutes) -CHECK_INTERVAL=300 +# Check interval in seconds (default: 150 = 2.5 minutes) +# Randomized by ±CHECK_INTERVAL_VARIANCE to avoid bot detection +CHECK_INTERVAL=150 +CHECK_INTERVAL_VARIANCE=30 # Form Data for Autopilot Applications FORM_ANREDE=Herr diff --git a/application_handler.py b/application_handler.py index a4e36a4..9d394c8 100644 --- a/application_handler.py +++ b/application_handler.py @@ -699,9 +699,11 @@ Total listings tracked: {total_listings} # Attempt login if not already logged in if not self.state_manager.logged_in: + logger.info("Not logged in - attempting login") login_success = await self.login(page) if login_success: self.state_manager.logged_in = True + logger.info("Login successful - session established") else: logger.warning("Login failed. Proceeding with public listings.") @@ -752,6 +754,10 @@ Total listings tracked: {total_listings} logger.info(f"Collected content from {page_num} page(s)") content = all_content + # Session validation: if logged in but only got 1 page, session may be expired + if self.state_manager.logged_in and page_num == 1: + logger.warning("Logged in but only 1 page fetched - possible session expiration") + # Debug: save HTML to file for inspection debug_path = DATA_DIR / "debug_page.html" with open(debug_path, "w", encoding="utf-8") as f: @@ -827,6 +833,9 @@ Total listings tracked: {total_listings} if not listings: logger.warning("No listings parsed") + # Additional check: if we have deeplinks but no parsed listings, something went wrong + if len(id_to_link) > 0: + logger.warning(f"Found {len(id_to_link)} deeplinks but parsed 0 listings - extraction issue") await page.close() logger.info(f"Fetched {len(listings)} listings") diff --git a/main.py b/main.py index 2503f84..333a745 100644 --- a/main.py +++ b/main.py @@ -12,6 +12,7 @@ from pathlib import Path from autoclean_debug import autoclean_debug_material from datetime import datetime, timezone import time +import random # --- Environment & Logging Setup --- @@ -46,7 +47,15 @@ logger = logging.getLogger(__name__) # Use named logger logger.info("Bot starting | Logs: data/monitor.log + console") # Interval (seconds) between checks for new listings -CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", 300)) # Default: 300 seconds +CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", 150)) # Default: 150 seconds (2.5 min) +CHECK_INTERVAL_VARIANCE = int(os.getenv("CHECK_INTERVAL_VARIANCE", 30)) # Default: ±30 seconds randomization + +def get_randomized_interval() -> int: + """Get check interval with random variance to avoid bot detection patterns.""" + variance = random.randint(-CHECK_INTERVAL_VARIANCE, CHECK_INTERVAL_VARIANCE) + interval = CHECK_INTERVAL + variance + # Ensure minimum of 60 seconds to avoid excessive load + return max(60, interval) def validate_config() -> bool: @@ -139,9 +148,11 @@ async def main() -> None: try: - logger.info(f"Bot is now running. Refreshing every {CHECK_INTERVAL} seconds...") + logger.info(f"Bot is now running. Refreshing every {CHECK_INTERVAL}±{CHECK_INTERVAL_VARIANCE}s (randomized)...") last_clean = 0 CLEAN_INTERVAL = 48 * 3600 # 48 hours in seconds + zero_listings_count = 0 # Track consecutive zero-listing fetches + MAX_ZERO_LISTINGS = 3 # Re-login after 3 consecutive zero fetches while True: now = asyncio.get_event_loop().time() # Autoclean debug material every 48 hours @@ -158,11 +169,30 @@ async def main() -> None: # Check if monitoring is enabled before fetching listings if not state_manager.is_monitoring_enabled(): logger.debug("Monitoring is paused, skipping listing check") - await asyncio.sleep(CHECK_INTERVAL) + sleep_interval = get_randomized_interval() + await asyncio.sleep(sleep_interval) _flush_rotating_file_handlers() continue current_listings = await app_handler.fetch_listings() + + # Session validation: detect when listings fetch returns empty repeatedly + if not current_listings or len(current_listings) == 0: + zero_listings_count += 1 + logger.warning(f"Zero listings fetched ({zero_listings_count}/{MAX_ZERO_LISTINGS})") + + if zero_listings_count >= MAX_ZERO_LISTINGS: + logger.warning("Session likely expired - forcing re-login") + state_manager.set_logged_in(False) + zero_listings_count = 0 + await asyncio.sleep(5) + continue + else: + # Reset counter on successful fetch + if zero_listings_count > 0: + logger.info(f"Session recovered - listings fetched successfully") + zero_listings_count = 0 + except Exception as e: logger.error(f"💥 Browser crash: {e}") logger.info("Recovering...") @@ -186,7 +216,9 @@ async def main() -> None: if not current_listings: logger.warning("No listings fetched") - await asyncio.sleep(CHECK_INTERVAL) + sleep_interval = get_randomized_interval() + logger.debug(f"Sleeping for {sleep_interval}s (randomized)") + await asyncio.sleep(sleep_interval) _flush_rotating_file_handlers() continue previous_listings = app_handler.load_previous_listings() @@ -208,7 +240,9 @@ async def main() -> None: } app_handler.save_application(result) app_handler.save_listings(current_listings) - await asyncio.sleep(CHECK_INTERVAL) + sleep_interval = get_randomized_interval() + logger.debug(f"Sleeping for {sleep_interval}s (randomized)") + await asyncio.sleep(sleep_interval) _flush_rotating_file_handlers() continue new_listings = app_handler.find_new_listings(current_listings, previous_listings) @@ -221,7 +255,9 @@ async def main() -> None: application_results = await app_handler.apply_to_listings(new_listings) app_handler.notify_new_listings(new_listings, application_results) app_handler.save_listings(current_listings) - await asyncio.sleep(CHECK_INTERVAL) + sleep_interval = get_randomized_interval() + logger.debug(f"Next check in {sleep_interval}s") + await asyncio.sleep(sleep_interval) _flush_rotating_file_handlers() except (KeyboardInterrupt, SystemExit): logger.info("Shutting down...") diff --git a/telegram_bot.py b/telegram_bot.py index db79271..450259c 100644 --- a/telegram_bot.py +++ b/telegram_bot.py @@ -198,9 +198,16 @@ class TelegramBot: self.last_update_id = update["update_id"] self._handle_update(update) except requests.exceptions.Timeout: + logger.debug("Telegram polling timeout (normal - will retry)") continue + except requests.exceptions.ConnectionError as e: + logger.warning(f"Telegram connection error (will retry): {str(e)[:100]}") + time.sleep(5) + except requests.exceptions.RequestException as e: + logger.error(f"Telegram request error: {type(e).__name__}: {str(e)[:100]}") + time.sleep(5) except Exception as e: - logger.error(f"Telegram polling error: {e}") + logger.error(f"Telegram polling error: {type(e).__name__}: {str(e)[:100]}") time.sleep(5) def _handle_update(self, update: dict) -> None: diff --git a/tests/test_randomized_timing.py b/tests/test_randomized_timing.py new file mode 100644 index 0000000..4a2101e --- /dev/null +++ b/tests/test_randomized_timing.py @@ -0,0 +1,66 @@ +"""Tests for randomized timing functionality""" +import pytest +import random + + +def get_randomized_interval(base_interval: int = 150, variance: int = 30) -> int: + """Get check interval with random variance to avoid bot detection patterns.""" + variance_value = random.randint(-variance, variance) + interval = base_interval + variance_value + # Ensure minimum of 60 seconds to avoid excessive load + return max(60, interval) + + +class TestRandomizedTiming: + """Test randomized check interval functionality""" + + def test_randomized_interval_within_bounds(self): + """Test that randomized interval stays within expected bounds""" + # Test multiple intervals to ensure randomization + intervals = [get_randomized_interval(150, 30) for _ in range(100)] + + # All intervals should be between 120 and 180 (150 ± 30) + assert all(120 <= i <= 180 for i in intervals), f"Some intervals outside bounds" + + # Should have some variance (not all the same) + assert len(set(intervals)) > 1, "No randomization detected - all intervals are identical" + + def test_minimum_interval_enforced(self): + """Test that minimum 60s interval is enforced even with high variance""" + # Test multiple times with extreme variance + intervals = [get_randomized_interval(90, 50) for _ in range(100)] + + # All intervals should be >= 60 + assert all(i >= 60 for i in intervals), f"Some intervals below 60s minimum: {min(intervals)}" + + def test_zero_variance_possible(self): + """Test that variance can be disabled by setting to 0""" + # All intervals should be exactly 150 + intervals = [get_randomized_interval(150, 0) for _ in range(20)] + assert all(i == 150 for i in intervals), "Variance=0 should produce constant interval" + + def test_interval_distribution(self): + """Test that randomization produces reasonable distribution""" + # Generate many samples + intervals = [get_randomized_interval(150, 30) for _ in range(1000)] + + avg = sum(intervals) / len(intervals) + + # Average should be close to 150 (within 5 seconds) + assert 145 <= avg <= 155, f"Average {avg} too far from expected 150" + + # Should have good spread (at least 20 different values) + assert len(set(intervals)) >= 20, "Not enough variance in intervals" + + def test_custom_parameters(self): + """Test with various custom base intervals and variances""" + test_cases = [ + (120, 20, 100, 140), # base, variance, min_expected, max_expected + (180, 40, 140, 220), + (200, 10, 190, 210), + ] + + for base, variance, min_exp, max_exp in test_cases: + intervals = [get_randomized_interval(base, variance) for _ in range(50)] + assert all(min_exp <= i <= max_exp for i in intervals), \ + f"Failed for base={base}, variance={variance}" diff --git a/tests/test_session_validation.py b/tests/test_session_validation.py new file mode 100644 index 0000000..3a24f49 --- /dev/null +++ b/tests/test_session_validation.py @@ -0,0 +1,82 @@ +"""Tests for session validation and re-login logic""" +import pytest +from unittest.mock import AsyncMock, MagicMock, patch +from pathlib import Path +import sys +import os + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from state_manager import StateManager + + +class TestSessionValidation: + """Test session validation and automatic re-login""" + + @pytest.fixture + def state_manager(self, tmp_path): + """Create a StateManager with a temporary state file""" + state_file = tmp_path / "state.json" + return StateManager(state_file) + + def test_logged_in_status_toggle(self, state_manager): + """Test that logged_in status can be toggled""" + assert state_manager.is_logged_in() is False + + state_manager.set_logged_in(True) + assert state_manager.is_logged_in() is True + + state_manager.set_logged_in(False) + assert state_manager.is_logged_in() is False + + def test_logged_in_persistence_is_memory_only(self, state_manager, tmp_path): + """Test that logged_in status is memory-only (not persisted to disk)""" + state_manager.set_logged_in(True) + assert state_manager.is_logged_in() is True + + # Create new state manager with same file - should start as False + new_state_manager = StateManager(tmp_path / "state.json") + assert new_state_manager.is_logged_in() is False + + def test_zero_listings_counter_increments(self): + """Test that zero_listings_count increments when no listings are fetched""" + # This would be tested in integration tests with the main loop + # For now, we test the logic conceptually + zero_listings_count = 0 + max_zero = 3 + + # Simulate 3 consecutive zero fetches + for _ in range(3): + current_listings = [] # Empty result + if not current_listings: + zero_listings_count += 1 + + assert zero_listings_count == max_zero + + def test_zero_listings_counter_resets_on_success(self): + """Test that counter resets when listings are successfully fetched""" + zero_listings_count = 2 + + # Simulate successful fetch + current_listings = [{"id": "test123", "rooms": "2"}] + if current_listings and len(current_listings) > 0: + zero_listings_count = 0 + + assert zero_listings_count == 0 + + def test_session_validation_logic(self, state_manager): + """Test the session validation workflow""" + # Start logged in + state_manager.set_logged_in(True) + assert state_manager.is_logged_in() is True + + # Simulate session expiration detection (zero listings threshold reached) + zero_listings_count = 3 + max_zero = 3 + + if zero_listings_count >= max_zero: + state_manager.set_logged_in(False) + + # Verify session was reset + assert state_manager.is_logged_in() is False