wohnbot/helper_functions/merge_applications.py

54 lines
1.9 KiB
Python
Raw Normal View History

2025-12-31 17:01:50 +01:00
rsync -av --progress -e "ssh -i ~/.ssh/id_rsa" data/ pi@omv.local:/srv/dev-disk-by-uuid-a920d9c0-dfc1-4a58-ae4d-92cf88ff04a5/docker-app/wohnbot/data/import json
2025-12-31 16:47:03 +01:00
from pathlib import Path
from datetime import datetime
def parse_timestamp(entry):
ts = entry.get('timestamp')
if ts:
try:
return datetime.fromisoformat(ts)
except Exception:
return None
return None
def merge_applications(local_path, merge_path, output_path=None):
"""
Merge two applications.json files, deduplicate by listing_id.
If duplicate, keep entry with more fields, or latest timestamp.
"""
output_path = output_path or local_path
with open(local_path, encoding='utf-8') as f:
local = json.load(f)
with open(merge_path, encoding='utf-8') as f:
remote = json.load(f)
merged = {}
all_keys = set(local.keys()) | set(remote.keys())
for key in all_keys:
l_entry = local.get(key)
r_entry = remote.get(key)
if l_entry and r_entry:
# Prefer entry with more fields
if len(l_entry) > len(r_entry):
merged[key] = l_entry
elif len(r_entry) > len(l_entry):
merged[key] = r_entry
else:
# If same length, prefer latest timestamp
l_ts = parse_timestamp(l_entry)
r_ts = parse_timestamp(r_entry)
if l_ts and r_ts:
merged[key] = l_entry if l_ts > r_ts else r_entry
else:
merged[key] = l_entry # fallback
else:
merged[key] = l_entry or r_entry
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(merged, f, ensure_ascii=False, indent=2)
print(f"Merged {len(merged)} unique applications to {output_path}")
if __name__ == "__main__":
merge_applications(
"data/applications.json",
"data/to_merge/applications.json"
)