50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
import json
|
|
from pathlib import Path
|
|
from datetime import datetime
|
|
|
|
def parse_timestamp(entry):
|
|
ts = entry.get('fetched_at')
|
|
if ts:
|
|
try:
|
|
return datetime.fromisoformat(ts)
|
|
except Exception:
|
|
return None
|
|
return None
|
|
|
|
def merge_dict_json(local_path, merge_path, output_path=None, timestamp_field='fetched_at'):
|
|
"""
|
|
Merge two dict-based JSON files (keyed by id), deduplicate by key.
|
|
If duplicate, keep entry with latest timestamp_field.
|
|
"""
|
|
output_path = output_path or local_path
|
|
with open(local_path, encoding='utf-8') as f:
|
|
local = json.load(f)
|
|
with open(merge_path, encoding='utf-8') as f:
|
|
remote = json.load(f)
|
|
merged = {}
|
|
all_keys = set(local.keys()) | set(remote.keys())
|
|
for key in all_keys:
|
|
l_entry = local.get(key)
|
|
r_entry = remote.get(key)
|
|
if l_entry and r_entry:
|
|
l_ts = l_entry.get(timestamp_field)
|
|
r_ts = r_entry.get(timestamp_field)
|
|
if l_ts and r_ts:
|
|
merged[key] = l_entry if l_ts > r_ts else r_entry
|
|
else:
|
|
merged[key] = l_entry # fallback
|
|
else:
|
|
merged[key] = l_entry or r_entry
|
|
with open(output_path, 'w', encoding='utf-8') as f:
|
|
json.dump(merged, f, ensure_ascii=False, indent=2)
|
|
print(f"Merged {len(merged)} unique entries to {output_path}")
|
|
|
|
if __name__ == "__main__":
|
|
merge_dict_json(
|
|
"data/listings.json",
|
|
"data/to_merge/listings.json"
|
|
)
|
|
merge_dict_json(
|
|
"data/wgcompany_listings.json",
|
|
"data/to_merge/wgcompany_listings.json"
|
|
)
|