wohnbot/helper_functions/merge_listing_times.py

35 lines
1.2 KiB
Python
Raw Normal View History

2025-12-31 16:47:03 +01:00
import csv
from pathlib import Path
def merge_listing_times(local_path, merge_path, output_path=None):
"""
Merge two listing_times.csv files, deduplicate by listing_id and timestamp.
local_path: main data/listing_times.csv
merge_path: data/to_merge/listing_times.csv
output_path: where to write merged file (default: overwrite local_path)
"""
output_path = output_path or local_path
seen = set()
rows = []
# Read both files
for path in [local_path, merge_path]:
with open(path, newline='', encoding='utf-8') as f:
reader = csv.DictReader(f)
for row in reader:
key = (row['listing_id'], row['timestamp'])
if key not in seen:
seen.add(key)
rows.append(row)
# Write merged file
with open(output_path, 'w', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
writer.writeheader()
writer.writerows(rows)
print(f"Merged {len(rows)} unique rows to {output_path}")
if __name__ == "__main__":
merge_listing_times(
"data/listing_times.csv",
"data/to_merge/listing_times.csv"
)