more translation

2025-10-06 18:01:45 +02:00 · 2025-10-06 18:01:45 +02:00 · a41be821c1
commit a41be821c1
parent 2ce9ca50b5
997 changed files with 33247 additions and 32490 deletions
--- a/scripts/analyze_tags.sh
+++ b/scripts/analyze_tags.sh
@ -0,0 +1,25 @@
+#!/bin/bash
+
+# Script to analyze and consolidate tags across the website
+
+echo "Analyzing all tags in the website..."
+echo "===================================="
+echo ""
+
+# Extract all tags and count their usage
+find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
+  grep -E '^\s*"' | \
+  sed 's/^[[:space:]]*//' | \
+  sed 's/"//g' | \
+  sed 's/,$//' | \
+  sort | uniq -c | sort -rn
+
+echo ""
+echo "===================================="
+echo "Total unique tags:"
+find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
+  grep -E '^\s*"' | \
+  sed 's/^[[:space:]]*//' | \
+  sed 's/"//g' | \
+  sed 's/,$//' | \
+  sort -u | wc -l
--- a/scripts/consolidate_all_tags.py
+++ b/scripts/consolidate_all_tags.py
@ -0,0 +1,400 @@
+#!/usr/bin/env python3
+"""
+Complete Tag Consolidation Script for Zola Website
+
+This script performs comprehensive tag standardization:
+1. Analyzes current tag usage
+2. Applies consolidation mappings (capitalization, language, concepts)
+3. Removes duplicate tags within files
+4. Generates a report
+
+Usage:
+    python3 consolidate_all_tags.py [--dry-run] [--backup]
+
+Options:
+    --dry-run    Show what would change without making changes
+    --backup     Create backup before making changes (default: yes)
+    --no-backup  Skip backup creation
+"""
+
+import re
+import sys
+import shutil
+from pathlib import Path
+from datetime import datetime
+from collections import defaultdict, Counter
+import argparse
+
+# ============================================================================
+# TAG MAPPING CONFIGURATION
+# ============================================================================
+
+TAG_MAP = {
+    # Capitalization fixes - everything lowercase
+    "AI": "ai",
+    "Unity": "unity",
+    "Workshop": "workshop",
+    "Stable Diffusion": "stable diffusion",
+    "University of the Arts Berlin": "university of the arts berlin",
+    "Arduino": "arduino",
+    "Linux": "linux",
+    "VLF": "vlf",
+    "SDR": "sdr",
+    "MTCNN": "mtcnn",
+    "ISD": "isd",
+    "GOFAI": "gofai",
+    "CNN": "cnn",
+    "LoRa": "lora",
+    "Materialübung": "materialübung",
+    "C#": "c#",
+
+    # 3D printing consolidation
+    "3D-Printing": "3d printing",
+    "3D printing": "3d printing",
+    "additive manufacturing": "3d printing",
+
+    # Graphics
+    "3D graphics": "3d graphics",
+
+    # Language fixes (English only - no German)
+    "programmierung": "programming",
+    "mobile werkstatt": "mobile workshop",
+    "urbane intervention": "urban intervention",
+    "bildung": "education",
+    "antenne": "antenna",
+    "elektronik": "electronics",
+    "blitz": "lightning",
+
+    # Automation
+    "automatic": "automation",
+    "automatic1111": "stable diffusion",
+
+    # Sustainability
+    "cradle-to-cradle": "sustainability",
+    "circular": "sustainability",
+
+    # Data
+    "data collection": "data",
+    "data viz": "data visualization",
+
+    # Energy
+    "electricity": "energy",
+    "solar": "energy",
+    "grid": "energy",
+
+    # Collaboration
+    "collaborative": "collaboration",
+    "collaborative recycling": "recycling",
+
+    # Communication
+    "blogging": "communication",
+
+    # Waste/recycling
+    "waste": "recycling",
+    "precious plastic": "recycling",
+    "shredder": "recycling",
+    "plastics-as-waste": "recycling",
+    "plastics-as-material": "plastics",
+
+    # University/research
+    "university": "research",
+    "master thesis": "thesis",
+
+    # Making/fabrication
+    "filastruder": "3d printing",
+    "filament": "3d printing",
+    "design for printing": "3d printing",
+
+    # Simulation
+    "simulation": "simulation",
+
+    # Scaling/design
+    "scaling": "design",
+
+    # Games/interactive
+    "game": "interactive",
+    "1st person": "interactive",
+    "2 player": "interactive",
+    "3rd person": "interactive",
+    "cyberpunk": "speculative design",
+
+    # Infrastructure
+    "hosting": "infrastructure",
+    "decentral": "decentralized",
+
+    # Geographic
+    "iit kharagpur": "india",
+    "himalaya": "india",
+
+    # Programming
+    "rust": "programming",
+    "physics": "programming",
+    "ml": "machine learning",
+
+    # Work/private
+    "privat": "work",
+
+    # Person names -> topics
+    "alison jaggar": "philosophy",
+    "elizabeth anderson": "philosophy",
+    "elsa dorlin": "philosophy",
+    "francois ewald": "philosophy",
+    "josé medina": "philosophy",
+    "judith butler": "philosophy",
+    "michael foucault": "philosophy",
+    "miranda fricker": "philosophy",
+    "geert lovink": "media theory",
+    "evgeny morozov": "media theory",
+    "lisa parks": "media theory",
+    "francis hunger": "media theory",
+
+    # Remove entirely
+    "TODO, unfinished": None,
+}
+
+# ============================================================================
+# HELPER FUNCTIONS
+# ============================================================================
+
+def clean_tag(tag):
+    """Remove trailing commas, spaces, and normalize"""
+    return tag.strip().rstrip(',').strip()
+
+def create_backup(content_dir):
+    """Create timestamped backup of content directory"""
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+    backup_dir = Path("backups") / f"tags_{timestamp}"
+    backup_dir.mkdir(parents=True, exist_ok=True)
+
+    shutil.copytree(content_dir / "project", backup_dir / "project")
+    print(f"✓ Backup created: {backup_dir}")
+    return backup_dir
+
+def analyze_tags(content_dir):
+    """Analyze current tag usage across all files"""
+    all_tags = []
+
+    for md_file in content_dir.rglob("*.md"):
+        with open(md_file, 'r', encoding='utf-8') as f:
+            content = f.read()
+
+        tags_pattern = r'tags = \[(.*?)\]'
+        match = re.search(tags_pattern, content, re.DOTALL)
+        if match:
+            tag_pattern = r'"([^"]+)"'
+            tags = re.findall(tag_pattern, match.group(1))
+            all_tags.extend([clean_tag(t) for t in tags])
+
+    return Counter(all_tags)
+
+# ============================================================================
+# MAIN PROCESSING FUNCTIONS
+# ============================================================================
+
+def process_file(filepath, dry_run=False):
+    """
+    Process a single markdown file:
+    1. Apply tag mappings
+    2. Remove duplicates
+    3. Clean formatting
+    """
+    with open(filepath, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # Find tags section
+    tags_pattern = r'(tags = \[)(.*?)(\])'
+    match = re.search(tags_pattern, content, re.DOTALL)
+
+    if not match:
+        return None
+
+    before_section = match.group(1)
+    tags_content = match.group(2)
+    after_section = match.group(3)
+
+    # Extract tags
+    tag_pattern = r'"([^"]+)"'
+    original_tags = re.findall(tag_pattern, tags_content)
+
+    # Process tags
+    processed_tags = []
+    changes = []
+
+    for tag in original_tags:
+        cleaned = clean_tag(tag)
+
+        # Apply mapping
+        if cleaned in TAG_MAP:
+            mapped = TAG_MAP[cleaned]
+            if mapped is None:
+                # Tag marked for removal
+                changes.append(f"  ✗ {cleaned} (removed)")
+                continue
+            elif mapped != cleaned:
+                changes.append(f"  → {cleaned} → {mapped}")
+                processed_tags.append(mapped)
+            else:
+                processed_tags.append(mapped)
+        elif cleaned != tag:
+            # Just cleaned, no mapping
+            changes.append(f"  ✓ {tag} → {cleaned} (cleaned)")
+            processed_tags.append(cleaned)
+        else:
+            processed_tags.append(cleaned)
+
+    # Remove duplicates while preserving order
+    seen = set()
+    unique_tags = []
+    duplicates = []
+
+    for tag in processed_tags:
+        tag_lower = tag.lower().strip()
+        if tag_lower not in seen:
+            seen.add(tag_lower)
+            unique_tags.append(tag)
+        else:
+            duplicates.append(tag)
+
+    if duplicates:
+        changes.append(f"  ⚠ Removed duplicates: {', '.join(duplicates)}")
+
+    if not changes:
+        return None  # No changes needed
+
+    # Reconstruct tags section
+    new_tags_content = '\n  "' + '",\n  "'.join(unique_tags) + '",\n'
+    new_tags_section = before_section + new_tags_content + after_section
+    new_content = content[:match.start()] + new_tags_section + content[match.end():]
+
+    # Write changes
+    if not dry_run:
+        with open(filepath, 'w', encoding='utf-8') as f:
+            f.write(new_content)
+
+    return {
+        'changes': changes,
+        'before_count': len(original_tags),
+        'after_count': len(unique_tags),
+        'duplicates_removed': len(duplicates)
+    }
+
+def consolidate_all_tags(content_dir, dry_run=False, create_backup_flag=True):
+    """Main consolidation process"""
+
+    print("=" * 70)
+    print("TAG CONSOLIDATION SCRIPT")
+    print("=" * 70)
+    print()
+
+    # Step 1: Analyze current state
+    print("📊 Analyzing current tags...")
+    tag_counts = analyze_tags(content_dir)
+    print(f"   Total unique tags: {len(tag_counts)}")
+    print(f"   Total tag instances: {sum(tag_counts.values())}")
+    print()
+
+    # Step 2: Create backup
+    if create_backup_flag and not dry_run:
+        print("💾 Creating backup...")
+        backup_dir = create_backup(content_dir)
+        print()
+
+    # Step 3: Process files
+    if dry_run:
+        print("🔍 DRY RUN - No changes will be made")
+        print()
+    else:
+        print("🔧 Processing files...")
+        print()
+
+    files_changed = 0
+    total_changes = 0
+    total_duplicates = 0
+
+    for md_file in sorted(content_dir.rglob("*.md")):
+        result = process_file(md_file, dry_run)
+        if result:
+            rel_path = md_file.relative_to(content_dir.parent)
+            print(f"📝 {rel_path}")
+            print(f"   Tags: {result['before_count']} → {result['after_count']}")
+            for change in result['changes']:
+                print(change)
+            print()
+
+            files_changed += 1
+            total_changes += len(result['changes'])
+            total_duplicates += result['duplicates_removed']
+
+    # Step 4: Final analysis
+    if not dry_run:
+        print("=" * 70)
+        print("📊 Final analysis...")
+        final_tag_counts = analyze_tags(content_dir)
+        print(f"   Total unique tags: {len(final_tag_counts)}")
+        print(f"   Total tag instances: {sum(final_tag_counts.values())}")
+        print()
+
+    # Step 5: Summary
+    print("=" * 70)
+    print("SUMMARY")
+    print("=" * 70)
+    print(f"Files processed: {files_changed}")
+    print(f"Total changes: {total_changes}")
+    print(f"Duplicates removed: {total_duplicates}")
+
+    if not dry_run:
+        print(f"Tag reduction: {len(tag_counts)} → {len(final_tag_counts)} "
+              f"({len(tag_counts) - len(final_tag_counts)} tags removed)")
+
+    if dry_run:
+        print()
+        print("⚠️  This was a DRY RUN. No files were modified.")
+        print("   Run without --dry-run to apply changes.")
+    else:
+        print()
+        print("✅ Tag consolidation complete!")
+        if create_backup_flag:
+            print(f"   Backup saved: {backup_dir}")
+
+    print("=" * 70)
+
+# ============================================================================
+# MAIN ENTRY POINT
+# ============================================================================
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Consolidate and standardize tags across all markdown files',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=__doc__
+    )
+    parser.add_argument('--dry-run', action='store_true',
+                        help='Show what would change without making changes')
+    parser.add_argument('--no-backup', action='store_true',
+                        help='Skip creating backup before changes')
+
+    args = parser.parse_args()
+
+    content_dir = Path("content/project")
+    if not content_dir.exists():
+        print(f"❌ Error: {content_dir} does not exist")
+        print("   Make sure you run this from the project root directory")
+        sys.exit(1)
+
+    try:
+        consolidate_all_tags(
+            content_dir,
+            dry_run=args.dry_run,
+            create_backup_flag=not args.no_backup
+        )
+    except KeyboardInterrupt:
+        print("\n\n❌ Interrupted by user")
+        sys.exit(1)
+    except Exception as e:
+        print(f"\n\n❌ Error: {e}")
+        import traceback
+        traceback.print_exc()
+        sys.exit(1)
+
+if __name__ == "__main__":
+    main()
--- a/scripts/tag_consolidation_map.txt
+++ b/scripts/tag_consolidation_map.txt
@ -0,0 +1,193 @@
+# Tag Consolidation Map
+# Format: old_tag -> new_tag
+# This will be used to standardize tags across the website
+
+# Remove trailing commas and spaces
+*,   -> *
+*,  -> *
+
+# Capitalization fixes
+AI -> ai
+Unity -> unity
+Unity, -> unity
+Unity,  -> unity
+Workshop -> workshop
+Stable Diffusion -> stable diffusion
+University of the Arts Berlin -> university of the arts berlin
+Arduino -> arduino
+Arduino, -> arduino
+Arduino,  -> arduino
+MTCNN -> mtcnn
+ISD -> isd
+GOFAI -> gofai
+CNN -> cnn
+LoRa -> lora
+Linux -> linux
+Linux, -> linux
+Linux,  -> linux
+Materialübung -> materialübung
+SDR -> sdr
+C# -> c#
+C#, -> c#
+C#,  -> c#
+
+# 3D printing consolidation
+3D-Printing -> 3d printing
+3D printing,  -> 3d printing
+additive manufacturing -> 3d printing
+
+# Graphics/visuals
+3D graphics -> 3d graphics
+3D graphics,  -> 3d graphics
+
+# Remove trailing markers
+1st person,  -> 1st person
+1st person, -> 1st person
+2 player,  -> 2 player
+2 player, -> 2 player
+3rd person,  -> 3rd person
+3rd person, -> 3rd person
+
+# Language fixes (keep English)
+programmierung,  -> programming
+programmierung -> programming
+mobile werkstatt -> mobile workshop
+urbane intervention -> urban intervention
+blitz -> lightning
+antenne -> antenna
+elektronik -> electronics
+bildung -> education
+
+# Concept consolidation
+automatic -> automation
+automatic,  -> automation
+automatic, -> automation
+automatic1111 -> stable diffusion
+
+# Sustainability related
+cradle-to-cradle -> sustainability
+cradle-to-cradle, -> sustainability
+cradle-to-cradle,  -> sustainability
+environment,  -> environment
+sustainability,  -> sustainability
+
+# Data related
+data collection -> data
+data collection,  -> data
+data viz -> data visualization
+data viz,  -> data visualization
+
+# Energy related
+energy,  -> energy
+electricity -> energy
+electricity,  -> energy
+solar -> energy
+solar,  -> energy
+grid -> energy
+grid,  -> energy
+
+# Collaboration
+collaborative -> collaboration
+collaborative,  -> collaboration
+collaborative recycling -> recycling
+
+# Circular economy
+circular -> sustainability
+circular,  -> sustainability
+
+# Communication
+communication,  -> communication
+blogging -> communication
+blogging,  -> communication
+
+# Waste/recycling
+waste   -> recycling
+waste -> recycling
+recycling,  -> recycling
+precious plastic -> recycling
+precious plastic,  -> recycling
+shredder -> recycling
+shredder,  -> recycling
+plastics-as-waste -> recycling
+plastics-as-material -> plastics
+plastics,  -> plastics
+
+# Research/university
+university of osnabrück,  -> university of osnabrück
+university of osnabrück   -> university of osnabrück
+university -> research
+research,  -> research
+master thesis -> thesis
+master thesis,  -> thesis
+
+# Making/fabrication
+filastruder -> 3d printing
+filastruder,  -> 3d printing
+filament -> 3d printing
+filament,  -> 3d printing
+design for printing -> 3d printing
+
+# Engineering
+engineering,  -> engineering
+
+# Experiments
+experiment,  -> experiment
+
+# Simulation
+simulation -> simulation
+simulation,  -> simulation
+
+# Scaling
+scaling -> design
+scaling,  -> design
+
+# Games
+game -> interactive
+game,  -> interactive
+1st person -> interactive
+2 player -> interactive
+3rd person -> interactive
+cyberpunk -> speculative design
+cyberpunk,  -> speculative design
+
+# Hosting/infrastructure
+hosting -> infrastructure
+hosting,  -> infrastructure
+decentral   -> decentralized
+decentral,  -> decentralized
+decentral  -> decentralized
+decentral -> decentralized
+
+# Geographic
+india -> india
+india,  -> india
+iit kharagpur -> india
+iit kharagpur,  -> india
+himalaya -> india
+himalaya,  -> india
+
+# Programming languages (keep specific when meaningful)
+rust -> programming
+rust,  -> programming
+physics -> programming
+physics,  -> programming
+ml -> machine learning
+ml,  -> machine learning
+
+# Private/work (seems like metadata, maybe remove?)
+privat   -> work
+private,  -> work
+
+# Remove overly specific person names - consolidate to topic
+alison jaggar -> philosophy
+elizabeth anderson -> philosophy
+elsa dorlin -> philosophy
+francois ewald -> philosophy
+josé medina -> philosophy
+judith butler -> philosophy
+michael foucault -> philosophy
+miranda fricker -> philosophy
+geert lovink -> media theory
+evgeny morozov -> media theory
+lisa parks -> media theory
+francis hunger -> media theory