diff --git a/Dockerfile b/Dockerfile index 6797906..a45b7e2 100644 --- a/Dockerfile +++ b/Dockerfile @@ -21,6 +21,10 @@ COPY tests/ ./tests/ # Copy state manager COPY state_manager.py . + +# Copy autoclean_debug utility +COPY autoclean_debug.py . + # Move the main.py COPY statement to the end to ensure it is updated last COPY main.py . diff --git a/application_handler.py b/application_handler.py index 013ed7e..1e44f65 100644 --- a/application_handler.py +++ b/application_handler.py @@ -24,7 +24,7 @@ import os STATE_FILE = Path("data/state.json") APPLICATIONS_FILE = Path("data/applications.json") -TIMING_FILE = Path("data/timing.csv") +TIMING_FILE = Path("data/listing_times.csv") LISTINGS_FILE = Path("data/listings.json") DATA_DIR = Path("data") diff --git a/autoclean_debug.py b/autoclean_debug.py new file mode 100644 index 0000000..ab51a45 --- /dev/null +++ b/autoclean_debug.py @@ -0,0 +1,34 @@ +import os +import time +from pathlib import Path +import logging + +def autoclean_debug_material(data_dir="data", max_age_hours=48): + """ + Delete debug files (PNGs, HTMLs, etc.) in data/ and subfolders older than max_age_hours. + Does NOT delete listings, applications, state, or CSV/JSON/LOG files. + """ + logger = logging.getLogger() + now = time.time() + max_age = max_age_hours * 3600 + # File extensions considered debug material + debug_exts = {".png", ".html"} + # Always skip these files (listing, state, applications, logs, csv, json, ttf, etc.) + safe_exts = {".json", ".csv", ".log", ".ttf"} + safe_names = {"listings.json", "applications.json", "state.json", "wgcompany_listings.json", "wgcompany_times.csv", "listing_times.csv"} + data_path = Path(data_dir) + deleted = [] + for root, dirs, files in os.walk(data_path): + for fname in files: + fpath = Path(root) / fname + ext = fpath.suffix.lower() + if ext in debug_exts and ext not in safe_exts and fname not in safe_names: + try: + mtime = fpath.stat().st_mtime + if now - mtime > max_age: + fpath.unlink() + deleted.append(str(fpath)) + except Exception as e: + logger.warning(f"Could not delete {fpath}: {e}") + logger.info(f"Autocleaned {len(deleted)} debug files older than {max_age_hours}h.") + return deleted diff --git a/main.py b/main.py index b7592dd..5896185 100644 --- a/main.py +++ b/main.py @@ -1,3 +1,4 @@ + import asyncio from playwright.async_api import async_playwright from application_handler import ApplicationHandler @@ -9,6 +10,7 @@ import os from dotenv import load_dotenv from state_manager import StateManager from pathlib import Path +from autoclean_debug import autoclean_debug_material # --- Environment & Logging Setup --- @@ -70,7 +72,20 @@ async def main(): try: logger.info(f"Bot is now running. Refreshing every {CHECK_INTERVAL} seconds...") + last_clean = 0 + CLEAN_INTERVAL = 48 * 3600 # 48 hours in seconds while True: + now = asyncio.get_event_loop().time() + # Autoclean debug material every 48 hours + if now - last_clean > CLEAN_INTERVAL: + logger.info("Running autoclean_debug_material (48h interval)...") + try: + deleted = autoclean_debug_material() + logger.info(f"Autocleaned {len(deleted)} debug files.") + except Exception as e: + logger.warning(f"Autoclean failed: {e}") + last_clean = now + current_listings = await app_handler.fetch_listings() if not current_listings: logger.warning("No listings fetched")