fix session expired and try variant timing
This commit is contained in:
parent
867c6f3152
commit
b4e84e430e
6 changed files with 211 additions and 9 deletions
|
|
@ -7,8 +7,10 @@ TELEGRAM_CHAT_ID=your_chat_id_here
|
|||
INBERLIN_EMAIL=your_email@example.com
|
||||
INBERLIN_PASSWORD=your_password_here
|
||||
|
||||
# Check interval in seconds (default: 300 = 5 minutes)
|
||||
CHECK_INTERVAL=300
|
||||
# Check interval in seconds (default: 150 = 2.5 minutes)
|
||||
# Randomized by ±CHECK_INTERVAL_VARIANCE to avoid bot detection
|
||||
CHECK_INTERVAL=150
|
||||
CHECK_INTERVAL_VARIANCE=30
|
||||
|
||||
# Form Data for Autopilot Applications
|
||||
FORM_ANREDE=Herr
|
||||
|
|
|
|||
|
|
@ -699,9 +699,11 @@ Total listings tracked: {total_listings}
|
|||
|
||||
# Attempt login if not already logged in
|
||||
if not self.state_manager.logged_in:
|
||||
logger.info("Not logged in - attempting login")
|
||||
login_success = await self.login(page)
|
||||
if login_success:
|
||||
self.state_manager.logged_in = True
|
||||
logger.info("Login successful - session established")
|
||||
else:
|
||||
logger.warning("Login failed. Proceeding with public listings.")
|
||||
|
||||
|
|
@ -752,6 +754,10 @@ Total listings tracked: {total_listings}
|
|||
logger.info(f"Collected content from {page_num} page(s)")
|
||||
content = all_content
|
||||
|
||||
# Session validation: if logged in but only got 1 page, session may be expired
|
||||
if self.state_manager.logged_in and page_num == 1:
|
||||
logger.warning("Logged in but only 1 page fetched - possible session expiration")
|
||||
|
||||
# Debug: save HTML to file for inspection
|
||||
debug_path = DATA_DIR / "debug_page.html"
|
||||
with open(debug_path, "w", encoding="utf-8") as f:
|
||||
|
|
@ -827,6 +833,9 @@ Total listings tracked: {total_listings}
|
|||
|
||||
if not listings:
|
||||
logger.warning("No listings parsed")
|
||||
# Additional check: if we have deeplinks but no parsed listings, something went wrong
|
||||
if len(id_to_link) > 0:
|
||||
logger.warning(f"Found {len(id_to_link)} deeplinks but parsed 0 listings - extraction issue")
|
||||
|
||||
await page.close()
|
||||
logger.info(f"Fetched {len(listings)} listings")
|
||||
|
|
|
|||
48
main.py
48
main.py
|
|
@ -12,6 +12,7 @@ from pathlib import Path
|
|||
from autoclean_debug import autoclean_debug_material
|
||||
from datetime import datetime, timezone
|
||||
import time
|
||||
import random
|
||||
|
||||
|
||||
# --- Environment & Logging Setup ---
|
||||
|
|
@ -46,7 +47,15 @@ logger = logging.getLogger(__name__) # Use named logger
|
|||
logger.info("Bot starting | Logs: data/monitor.log + console")
|
||||
|
||||
# Interval (seconds) between checks for new listings
|
||||
CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", 300)) # Default: 300 seconds
|
||||
CHECK_INTERVAL = int(os.getenv("CHECK_INTERVAL", 150)) # Default: 150 seconds (2.5 min)
|
||||
CHECK_INTERVAL_VARIANCE = int(os.getenv("CHECK_INTERVAL_VARIANCE", 30)) # Default: ±30 seconds randomization
|
||||
|
||||
def get_randomized_interval() -> int:
|
||||
"""Get check interval with random variance to avoid bot detection patterns."""
|
||||
variance = random.randint(-CHECK_INTERVAL_VARIANCE, CHECK_INTERVAL_VARIANCE)
|
||||
interval = CHECK_INTERVAL + variance
|
||||
# Ensure minimum of 60 seconds to avoid excessive load
|
||||
return max(60, interval)
|
||||
|
||||
|
||||
def validate_config() -> bool:
|
||||
|
|
@ -139,9 +148,11 @@ async def main() -> None:
|
|||
|
||||
|
||||
try:
|
||||
logger.info(f"Bot is now running. Refreshing every {CHECK_INTERVAL} seconds...")
|
||||
logger.info(f"Bot is now running. Refreshing every {CHECK_INTERVAL}±{CHECK_INTERVAL_VARIANCE}s (randomized)...")
|
||||
last_clean = 0
|
||||
CLEAN_INTERVAL = 48 * 3600 # 48 hours in seconds
|
||||
zero_listings_count = 0 # Track consecutive zero-listing fetches
|
||||
MAX_ZERO_LISTINGS = 3 # Re-login after 3 consecutive zero fetches
|
||||
while True:
|
||||
now = asyncio.get_event_loop().time()
|
||||
# Autoclean debug material every 48 hours
|
||||
|
|
@ -158,11 +169,30 @@ async def main() -> None:
|
|||
# Check if monitoring is enabled before fetching listings
|
||||
if not state_manager.is_monitoring_enabled():
|
||||
logger.debug("Monitoring is paused, skipping listing check")
|
||||
await asyncio.sleep(CHECK_INTERVAL)
|
||||
sleep_interval = get_randomized_interval()
|
||||
await asyncio.sleep(sleep_interval)
|
||||
_flush_rotating_file_handlers()
|
||||
continue
|
||||
|
||||
current_listings = await app_handler.fetch_listings()
|
||||
|
||||
# Session validation: detect when listings fetch returns empty repeatedly
|
||||
if not current_listings or len(current_listings) == 0:
|
||||
zero_listings_count += 1
|
||||
logger.warning(f"Zero listings fetched ({zero_listings_count}/{MAX_ZERO_LISTINGS})")
|
||||
|
||||
if zero_listings_count >= MAX_ZERO_LISTINGS:
|
||||
logger.warning("Session likely expired - forcing re-login")
|
||||
state_manager.set_logged_in(False)
|
||||
zero_listings_count = 0
|
||||
await asyncio.sleep(5)
|
||||
continue
|
||||
else:
|
||||
# Reset counter on successful fetch
|
||||
if zero_listings_count > 0:
|
||||
logger.info(f"Session recovered - listings fetched successfully")
|
||||
zero_listings_count = 0
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"💥 Browser crash: {e}")
|
||||
logger.info("Recovering...")
|
||||
|
|
@ -186,7 +216,9 @@ async def main() -> None:
|
|||
|
||||
if not current_listings:
|
||||
logger.warning("No listings fetched")
|
||||
await asyncio.sleep(CHECK_INTERVAL)
|
||||
sleep_interval = get_randomized_interval()
|
||||
logger.debug(f"Sleeping for {sleep_interval}s (randomized)")
|
||||
await asyncio.sleep(sleep_interval)
|
||||
_flush_rotating_file_handlers()
|
||||
continue
|
||||
previous_listings = app_handler.load_previous_listings()
|
||||
|
|
@ -208,7 +240,9 @@ async def main() -> None:
|
|||
}
|
||||
app_handler.save_application(result)
|
||||
app_handler.save_listings(current_listings)
|
||||
await asyncio.sleep(CHECK_INTERVAL)
|
||||
sleep_interval = get_randomized_interval()
|
||||
logger.debug(f"Sleeping for {sleep_interval}s (randomized)")
|
||||
await asyncio.sleep(sleep_interval)
|
||||
_flush_rotating_file_handlers()
|
||||
continue
|
||||
new_listings = app_handler.find_new_listings(current_listings, previous_listings)
|
||||
|
|
@ -221,7 +255,9 @@ async def main() -> None:
|
|||
application_results = await app_handler.apply_to_listings(new_listings)
|
||||
app_handler.notify_new_listings(new_listings, application_results)
|
||||
app_handler.save_listings(current_listings)
|
||||
await asyncio.sleep(CHECK_INTERVAL)
|
||||
sleep_interval = get_randomized_interval()
|
||||
logger.debug(f"Next check in {sleep_interval}s")
|
||||
await asyncio.sleep(sleep_interval)
|
||||
_flush_rotating_file_handlers()
|
||||
except (KeyboardInterrupt, SystemExit):
|
||||
logger.info("Shutting down...")
|
||||
|
|
|
|||
|
|
@ -198,9 +198,16 @@ class TelegramBot:
|
|||
self.last_update_id = update["update_id"]
|
||||
self._handle_update(update)
|
||||
except requests.exceptions.Timeout:
|
||||
logger.debug("Telegram polling timeout (normal - will retry)")
|
||||
continue
|
||||
except requests.exceptions.ConnectionError as e:
|
||||
logger.warning(f"Telegram connection error (will retry): {str(e)[:100]}")
|
||||
time.sleep(5)
|
||||
except requests.exceptions.RequestException as e:
|
||||
logger.error(f"Telegram request error: {type(e).__name__}: {str(e)[:100]}")
|
||||
time.sleep(5)
|
||||
except Exception as e:
|
||||
logger.error(f"Telegram polling error: {e}")
|
||||
logger.error(f"Telegram polling error: {type(e).__name__}: {str(e)[:100]}")
|
||||
time.sleep(5)
|
||||
|
||||
def _handle_update(self, update: dict) -> None:
|
||||
|
|
|
|||
66
tests/test_randomized_timing.py
Normal file
66
tests/test_randomized_timing.py
Normal file
|
|
@ -0,0 +1,66 @@
|
|||
"""Tests for randomized timing functionality"""
|
||||
import pytest
|
||||
import random
|
||||
|
||||
|
||||
def get_randomized_interval(base_interval: int = 150, variance: int = 30) -> int:
|
||||
"""Get check interval with random variance to avoid bot detection patterns."""
|
||||
variance_value = random.randint(-variance, variance)
|
||||
interval = base_interval + variance_value
|
||||
# Ensure minimum of 60 seconds to avoid excessive load
|
||||
return max(60, interval)
|
||||
|
||||
|
||||
class TestRandomizedTiming:
|
||||
"""Test randomized check interval functionality"""
|
||||
|
||||
def test_randomized_interval_within_bounds(self):
|
||||
"""Test that randomized interval stays within expected bounds"""
|
||||
# Test multiple intervals to ensure randomization
|
||||
intervals = [get_randomized_interval(150, 30) for _ in range(100)]
|
||||
|
||||
# All intervals should be between 120 and 180 (150 ± 30)
|
||||
assert all(120 <= i <= 180 for i in intervals), f"Some intervals outside bounds"
|
||||
|
||||
# Should have some variance (not all the same)
|
||||
assert len(set(intervals)) > 1, "No randomization detected - all intervals are identical"
|
||||
|
||||
def test_minimum_interval_enforced(self):
|
||||
"""Test that minimum 60s interval is enforced even with high variance"""
|
||||
# Test multiple times with extreme variance
|
||||
intervals = [get_randomized_interval(90, 50) for _ in range(100)]
|
||||
|
||||
# All intervals should be >= 60
|
||||
assert all(i >= 60 for i in intervals), f"Some intervals below 60s minimum: {min(intervals)}"
|
||||
|
||||
def test_zero_variance_possible(self):
|
||||
"""Test that variance can be disabled by setting to 0"""
|
||||
# All intervals should be exactly 150
|
||||
intervals = [get_randomized_interval(150, 0) for _ in range(20)]
|
||||
assert all(i == 150 for i in intervals), "Variance=0 should produce constant interval"
|
||||
|
||||
def test_interval_distribution(self):
|
||||
"""Test that randomization produces reasonable distribution"""
|
||||
# Generate many samples
|
||||
intervals = [get_randomized_interval(150, 30) for _ in range(1000)]
|
||||
|
||||
avg = sum(intervals) / len(intervals)
|
||||
|
||||
# Average should be close to 150 (within 5 seconds)
|
||||
assert 145 <= avg <= 155, f"Average {avg} too far from expected 150"
|
||||
|
||||
# Should have good spread (at least 20 different values)
|
||||
assert len(set(intervals)) >= 20, "Not enough variance in intervals"
|
||||
|
||||
def test_custom_parameters(self):
|
||||
"""Test with various custom base intervals and variances"""
|
||||
test_cases = [
|
||||
(120, 20, 100, 140), # base, variance, min_expected, max_expected
|
||||
(180, 40, 140, 220),
|
||||
(200, 10, 190, 210),
|
||||
]
|
||||
|
||||
for base, variance, min_exp, max_exp in test_cases:
|
||||
intervals = [get_randomized_interval(base, variance) for _ in range(50)]
|
||||
assert all(min_exp <= i <= max_exp for i in intervals), \
|
||||
f"Failed for base={base}, variance={variance}"
|
||||
82
tests/test_session_validation.py
Normal file
82
tests/test_session_validation.py
Normal file
|
|
@ -0,0 +1,82 @@
|
|||
"""Tests for session validation and re-login logic"""
|
||||
import pytest
|
||||
from unittest.mock import AsyncMock, MagicMock, patch
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import os
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
from state_manager import StateManager
|
||||
|
||||
|
||||
class TestSessionValidation:
|
||||
"""Test session validation and automatic re-login"""
|
||||
|
||||
@pytest.fixture
|
||||
def state_manager(self, tmp_path):
|
||||
"""Create a StateManager with a temporary state file"""
|
||||
state_file = tmp_path / "state.json"
|
||||
return StateManager(state_file)
|
||||
|
||||
def test_logged_in_status_toggle(self, state_manager):
|
||||
"""Test that logged_in status can be toggled"""
|
||||
assert state_manager.is_logged_in() is False
|
||||
|
||||
state_manager.set_logged_in(True)
|
||||
assert state_manager.is_logged_in() is True
|
||||
|
||||
state_manager.set_logged_in(False)
|
||||
assert state_manager.is_logged_in() is False
|
||||
|
||||
def test_logged_in_persistence_is_memory_only(self, state_manager, tmp_path):
|
||||
"""Test that logged_in status is memory-only (not persisted to disk)"""
|
||||
state_manager.set_logged_in(True)
|
||||
assert state_manager.is_logged_in() is True
|
||||
|
||||
# Create new state manager with same file - should start as False
|
||||
new_state_manager = StateManager(tmp_path / "state.json")
|
||||
assert new_state_manager.is_logged_in() is False
|
||||
|
||||
def test_zero_listings_counter_increments(self):
|
||||
"""Test that zero_listings_count increments when no listings are fetched"""
|
||||
# This would be tested in integration tests with the main loop
|
||||
# For now, we test the logic conceptually
|
||||
zero_listings_count = 0
|
||||
max_zero = 3
|
||||
|
||||
# Simulate 3 consecutive zero fetches
|
||||
for _ in range(3):
|
||||
current_listings = [] # Empty result
|
||||
if not current_listings:
|
||||
zero_listings_count += 1
|
||||
|
||||
assert zero_listings_count == max_zero
|
||||
|
||||
def test_zero_listings_counter_resets_on_success(self):
|
||||
"""Test that counter resets when listings are successfully fetched"""
|
||||
zero_listings_count = 2
|
||||
|
||||
# Simulate successful fetch
|
||||
current_listings = [{"id": "test123", "rooms": "2"}]
|
||||
if current_listings and len(current_listings) > 0:
|
||||
zero_listings_count = 0
|
||||
|
||||
assert zero_listings_count == 0
|
||||
|
||||
def test_session_validation_logic(self, state_manager):
|
||||
"""Test the session validation workflow"""
|
||||
# Start logged in
|
||||
state_manager.set_logged_in(True)
|
||||
assert state_manager.is_logged_in() is True
|
||||
|
||||
# Simulate session expiration detection (zero listings threshold reached)
|
||||
zero_listings_count = 3
|
||||
max_zero = 3
|
||||
|
||||
if zero_listings_count >= max_zero:
|
||||
state_manager.set_logged_in(False)
|
||||
|
||||
# Verify session was reset
|
||||
assert state_manager.is_logged_in() is False
|
||||
Loading…
Add table
Add a link
Reference in a new issue