with autoclean

This commit is contained in:
Aron Petau 2025-12-31 16:19:14 +01:00
parent 540a3cc884
commit 55a6ddb819
4 changed files with 54 additions and 1 deletions

View file

@ -21,6 +21,10 @@ COPY tests/ ./tests/
# Copy state manager
COPY state_manager.py .
# Copy autoclean_debug utility
COPY autoclean_debug.py .
# Move the main.py COPY statement to the end to ensure it is updated last
COPY main.py .

View file

@ -24,7 +24,7 @@ import os
STATE_FILE = Path("data/state.json")
APPLICATIONS_FILE = Path("data/applications.json")
TIMING_FILE = Path("data/timing.csv")
TIMING_FILE = Path("data/listing_times.csv")
LISTINGS_FILE = Path("data/listings.json")
DATA_DIR = Path("data")

34
autoclean_debug.py Normal file
View file

@ -0,0 +1,34 @@
import os
import time
from pathlib import Path
import logging
def autoclean_debug_material(data_dir="data", max_age_hours=48):
"""
Delete debug files (PNGs, HTMLs, etc.) in data/ and subfolders older than max_age_hours.
Does NOT delete listings, applications, state, or CSV/JSON/LOG files.
"""
logger = logging.getLogger()
now = time.time()
max_age = max_age_hours * 3600
# File extensions considered debug material
debug_exts = {".png", ".html"}
# Always skip these files (listing, state, applications, logs, csv, json, ttf, etc.)
safe_exts = {".json", ".csv", ".log", ".ttf"}
safe_names = {"listings.json", "applications.json", "state.json", "wgcompany_listings.json", "wgcompany_times.csv", "listing_times.csv"}
data_path = Path(data_dir)
deleted = []
for root, dirs, files in os.walk(data_path):
for fname in files:
fpath = Path(root) / fname
ext = fpath.suffix.lower()
if ext in debug_exts and ext not in safe_exts and fname not in safe_names:
try:
mtime = fpath.stat().st_mtime
if now - mtime > max_age:
fpath.unlink()
deleted.append(str(fpath))
except Exception as e:
logger.warning(f"Could not delete {fpath}: {e}")
logger.info(f"Autocleaned {len(deleted)} debug files older than {max_age_hours}h.")
return deleted

15
main.py
View file

@ -1,3 +1,4 @@
import asyncio
from playwright.async_api import async_playwright
from application_handler import ApplicationHandler
@ -9,6 +10,7 @@ import os
from dotenv import load_dotenv
from state_manager import StateManager
from pathlib import Path
from autoclean_debug import autoclean_debug_material
# --- Environment & Logging Setup ---
@ -70,7 +72,20 @@ async def main():
try:
logger.info(f"Bot is now running. Refreshing every {CHECK_INTERVAL} seconds...")
last_clean = 0
CLEAN_INTERVAL = 48 * 3600 # 48 hours in seconds
while True:
now = asyncio.get_event_loop().time()
# Autoclean debug material every 48 hours
if now - last_clean > CLEAN_INTERVAL:
logger.info("Running autoclean_debug_material (48h interval)...")
try:
deleted = autoclean_debug_material()
logger.info(f"Autocleaned {len(deleted)} debug files.")
except Exception as e:
logger.warning(f"Autoclean failed: {e}")
last_clean = now
current_listings = await app_handler.fetch_listings()
if not current_listings:
logger.warning("No listings fetched")