import csv from pathlib import Path def merge_wgcompany_times(local_path, merge_path, output_path=None): """ Merge two wgcompany_times.csv files, deduplicate by listing_id and timestamp. local_path: main data/wgcompany_times.csv merge_path: data/to_merge/wgcompany_times.csv output_path: where to write merged file (default: overwrite local_path) """ output_path = output_path or local_path seen = set() rows = [] # Read both files for path in [local_path, merge_path]: with open(path, newline='', encoding='utf-8') as f: reader = csv.DictReader(f) for row in reader: key = (row['listing_id'], row['timestamp']) if key not in seen: seen.add(key) rows.append(row) # Write merged file with open(output_path, 'w', newline='', encoding='utf-8') as f: writer = csv.DictWriter(f, fieldnames=rows[0].keys()) writer.writeheader() writer.writerows(rows) print(f"Merged {len(rows)} unique rows to {output_path}") if __name__ == "__main__": merge_wgcompany_times( "data/wgcompany_times.csv", "data/to_merge/wgcompany_times.csv" )