35 lines
1.2 KiB
Python
35 lines
1.2 KiB
Python
|
|
import csv
|
||
|
|
from pathlib import Path
|
||
|
|
|
||
|
|
def merge_listing_times(local_path, merge_path, output_path=None):
|
||
|
|
"""
|
||
|
|
Merge two listing_times.csv files, deduplicate by listing_id and timestamp.
|
||
|
|
local_path: main data/listing_times.csv
|
||
|
|
merge_path: data/to_merge/listing_times.csv
|
||
|
|
output_path: where to write merged file (default: overwrite local_path)
|
||
|
|
"""
|
||
|
|
output_path = output_path or local_path
|
||
|
|
seen = set()
|
||
|
|
rows = []
|
||
|
|
# Read both files
|
||
|
|
for path in [local_path, merge_path]:
|
||
|
|
with open(path, newline='', encoding='utf-8') as f:
|
||
|
|
reader = csv.DictReader(f)
|
||
|
|
for row in reader:
|
||
|
|
key = (row['listing_id'], row['timestamp'])
|
||
|
|
if key not in seen:
|
||
|
|
seen.add(key)
|
||
|
|
rows.append(row)
|
||
|
|
# Write merged file
|
||
|
|
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
||
|
|
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
|
||
|
|
writer.writeheader()
|
||
|
|
writer.writerows(rows)
|
||
|
|
print(f"Merged {len(rows)} unique rows to {output_path}")
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
merge_listing_times(
|
||
|
|
"data/listing_times.csv",
|
||
|
|
"data/to_merge/listing_times.csv"
|
||
|
|
)
|