after merge
This commit is contained in:
parent
55a6ddb819
commit
deb074f074
4 changed files with 171 additions and 0 deletions
34
helper_functions/merge_listing_times.py
Normal file
34
helper_functions/merge_listing_times.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import csv
|
||||
from pathlib import Path
|
||||
|
||||
def merge_listing_times(local_path, merge_path, output_path=None):
|
||||
"""
|
||||
Merge two listing_times.csv files, deduplicate by listing_id and timestamp.
|
||||
local_path: main data/listing_times.csv
|
||||
merge_path: data/to_merge/listing_times.csv
|
||||
output_path: where to write merged file (default: overwrite local_path)
|
||||
"""
|
||||
output_path = output_path or local_path
|
||||
seen = set()
|
||||
rows = []
|
||||
# Read both files
|
||||
for path in [local_path, merge_path]:
|
||||
with open(path, newline='', encoding='utf-8') as f:
|
||||
reader = csv.DictReader(f)
|
||||
for row in reader:
|
||||
key = (row['listing_id'], row['timestamp'])
|
||||
if key not in seen:
|
||||
seen.add(key)
|
||||
rows.append(row)
|
||||
# Write merged file
|
||||
with open(output_path, 'w', newline='', encoding='utf-8') as f:
|
||||
writer = csv.DictWriter(f, fieldnames=rows[0].keys())
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
print(f"Merged {len(rows)} unique rows to {output_path}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
merge_listing_times(
|
||||
"data/listing_times.csv",
|
||||
"data/to_merge/listing_times.csv"
|
||||
)
|
||||
Loading…
Add table
Add a link
Reference in a new issue