more translation

This commit is contained in:
Aron Petau 2025-10-06 18:01:45 +02:00
parent 2ce9ca50b5
commit a41be821c1
997 changed files with 33247 additions and 32490 deletions

25
scripts/analyze_tags.sh Executable file
View file

@ -0,0 +1,25 @@
#!/bin/bash
# Script to analyze and consolidate tags across the website
echo "Analyzing all tags in the website..."
echo "===================================="
echo ""
# Extract all tags and count their usage
find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
grep -E '^\s*"' | \
sed 's/^[[:space:]]*//' | \
sed 's/"//g' | \
sed 's/,$//' | \
sort | uniq -c | sort -rn
echo ""
echo "===================================="
echo "Total unique tags:"
find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
grep -E '^\s*"' | \
sed 's/^[[:space:]]*//' | \
sed 's/"//g' | \
sed 's/,$//' | \
sort -u | wc -l

View file

@ -0,0 +1,400 @@
#!/usr/bin/env python3
"""
Complete Tag Consolidation Script for Zola Website
This script performs comprehensive tag standardization:
1. Analyzes current tag usage
2. Applies consolidation mappings (capitalization, language, concepts)
3. Removes duplicate tags within files
4. Generates a report
Usage:
python3 consolidate_all_tags.py [--dry-run] [--backup]
Options:
--dry-run Show what would change without making changes
--backup Create backup before making changes (default: yes)
--no-backup Skip backup creation
"""
import re
import sys
import shutil
from pathlib import Path
from datetime import datetime
from collections import defaultdict, Counter
import argparse
# ============================================================================
# TAG MAPPING CONFIGURATION
# ============================================================================
TAG_MAP = {
# Capitalization fixes - everything lowercase
"AI": "ai",
"Unity": "unity",
"Workshop": "workshop",
"Stable Diffusion": "stable diffusion",
"University of the Arts Berlin": "university of the arts berlin",
"Arduino": "arduino",
"Linux": "linux",
"VLF": "vlf",
"SDR": "sdr",
"MTCNN": "mtcnn",
"ISD": "isd",
"GOFAI": "gofai",
"CNN": "cnn",
"LoRa": "lora",
"Materialübung": "materialübung",
"C#": "c#",
# 3D printing consolidation
"3D-Printing": "3d printing",
"3D printing": "3d printing",
"additive manufacturing": "3d printing",
# Graphics
"3D graphics": "3d graphics",
# Language fixes (English only - no German)
"programmierung": "programming",
"mobile werkstatt": "mobile workshop",
"urbane intervention": "urban intervention",
"bildung": "education",
"antenne": "antenna",
"elektronik": "electronics",
"blitz": "lightning",
# Automation
"automatic": "automation",
"automatic1111": "stable diffusion",
# Sustainability
"cradle-to-cradle": "sustainability",
"circular": "sustainability",
# Data
"data collection": "data",
"data viz": "data visualization",
# Energy
"electricity": "energy",
"solar": "energy",
"grid": "energy",
# Collaboration
"collaborative": "collaboration",
"collaborative recycling": "recycling",
# Communication
"blogging": "communication",
# Waste/recycling
"waste": "recycling",
"precious plastic": "recycling",
"shredder": "recycling",
"plastics-as-waste": "recycling",
"plastics-as-material": "plastics",
# University/research
"university": "research",
"master thesis": "thesis",
# Making/fabrication
"filastruder": "3d printing",
"filament": "3d printing",
"design for printing": "3d printing",
# Simulation
"simulation": "simulation",
# Scaling/design
"scaling": "design",
# Games/interactive
"game": "interactive",
"1st person": "interactive",
"2 player": "interactive",
"3rd person": "interactive",
"cyberpunk": "speculative design",
# Infrastructure
"hosting": "infrastructure",
"decentral": "decentralized",
# Geographic
"iit kharagpur": "india",
"himalaya": "india",
# Programming
"rust": "programming",
"physics": "programming",
"ml": "machine learning",
# Work/private
"privat": "work",
# Person names -> topics
"alison jaggar": "philosophy",
"elizabeth anderson": "philosophy",
"elsa dorlin": "philosophy",
"francois ewald": "philosophy",
"josé medina": "philosophy",
"judith butler": "philosophy",
"michael foucault": "philosophy",
"miranda fricker": "philosophy",
"geert lovink": "media theory",
"evgeny morozov": "media theory",
"lisa parks": "media theory",
"francis hunger": "media theory",
# Remove entirely
"TODO, unfinished": None,
}
# ============================================================================
# HELPER FUNCTIONS
# ============================================================================
def clean_tag(tag):
"""Remove trailing commas, spaces, and normalize"""
return tag.strip().rstrip(',').strip()
def create_backup(content_dir):
"""Create timestamped backup of content directory"""
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
backup_dir = Path("backups") / f"tags_{timestamp}"
backup_dir.mkdir(parents=True, exist_ok=True)
shutil.copytree(content_dir / "project", backup_dir / "project")
print(f"✓ Backup created: {backup_dir}")
return backup_dir
def analyze_tags(content_dir):
"""Analyze current tag usage across all files"""
all_tags = []
for md_file in content_dir.rglob("*.md"):
with open(md_file, 'r', encoding='utf-8') as f:
content = f.read()
tags_pattern = r'tags = \[(.*?)\]'
match = re.search(tags_pattern, content, re.DOTALL)
if match:
tag_pattern = r'"([^"]+)"'
tags = re.findall(tag_pattern, match.group(1))
all_tags.extend([clean_tag(t) for t in tags])
return Counter(all_tags)
# ============================================================================
# MAIN PROCESSING FUNCTIONS
# ============================================================================
def process_file(filepath, dry_run=False):
"""
Process a single markdown file:
1. Apply tag mappings
2. Remove duplicates
3. Clean formatting
"""
with open(filepath, 'r', encoding='utf-8') as f:
content = f.read()
# Find tags section
tags_pattern = r'(tags = \[)(.*?)(\])'
match = re.search(tags_pattern, content, re.DOTALL)
if not match:
return None
before_section = match.group(1)
tags_content = match.group(2)
after_section = match.group(3)
# Extract tags
tag_pattern = r'"([^"]+)"'
original_tags = re.findall(tag_pattern, tags_content)
# Process tags
processed_tags = []
changes = []
for tag in original_tags:
cleaned = clean_tag(tag)
# Apply mapping
if cleaned in TAG_MAP:
mapped = TAG_MAP[cleaned]
if mapped is None:
# Tag marked for removal
changes.append(f"{cleaned} (removed)")
continue
elif mapped != cleaned:
changes.append(f"{cleaned}{mapped}")
processed_tags.append(mapped)
else:
processed_tags.append(mapped)
elif cleaned != tag:
# Just cleaned, no mapping
changes.append(f"{tag}{cleaned} (cleaned)")
processed_tags.append(cleaned)
else:
processed_tags.append(cleaned)
# Remove duplicates while preserving order
seen = set()
unique_tags = []
duplicates = []
for tag in processed_tags:
tag_lower = tag.lower().strip()
if tag_lower not in seen:
seen.add(tag_lower)
unique_tags.append(tag)
else:
duplicates.append(tag)
if duplicates:
changes.append(f" ⚠ Removed duplicates: {', '.join(duplicates)}")
if not changes:
return None # No changes needed
# Reconstruct tags section
new_tags_content = '\n "' + '",\n "'.join(unique_tags) + '",\n'
new_tags_section = before_section + new_tags_content + after_section
new_content = content[:match.start()] + new_tags_section + content[match.end():]
# Write changes
if not dry_run:
with open(filepath, 'w', encoding='utf-8') as f:
f.write(new_content)
return {
'changes': changes,
'before_count': len(original_tags),
'after_count': len(unique_tags),
'duplicates_removed': len(duplicates)
}
def consolidate_all_tags(content_dir, dry_run=False, create_backup_flag=True):
"""Main consolidation process"""
print("=" * 70)
print("TAG CONSOLIDATION SCRIPT")
print("=" * 70)
print()
# Step 1: Analyze current state
print("📊 Analyzing current tags...")
tag_counts = analyze_tags(content_dir)
print(f" Total unique tags: {len(tag_counts)}")
print(f" Total tag instances: {sum(tag_counts.values())}")
print()
# Step 2: Create backup
if create_backup_flag and not dry_run:
print("💾 Creating backup...")
backup_dir = create_backup(content_dir)
print()
# Step 3: Process files
if dry_run:
print("🔍 DRY RUN - No changes will be made")
print()
else:
print("🔧 Processing files...")
print()
files_changed = 0
total_changes = 0
total_duplicates = 0
for md_file in sorted(content_dir.rglob("*.md")):
result = process_file(md_file, dry_run)
if result:
rel_path = md_file.relative_to(content_dir.parent)
print(f"📝 {rel_path}")
print(f" Tags: {result['before_count']}{result['after_count']}")
for change in result['changes']:
print(change)
print()
files_changed += 1
total_changes += len(result['changes'])
total_duplicates += result['duplicates_removed']
# Step 4: Final analysis
if not dry_run:
print("=" * 70)
print("📊 Final analysis...")
final_tag_counts = analyze_tags(content_dir)
print(f" Total unique tags: {len(final_tag_counts)}")
print(f" Total tag instances: {sum(final_tag_counts.values())}")
print()
# Step 5: Summary
print("=" * 70)
print("SUMMARY")
print("=" * 70)
print(f"Files processed: {files_changed}")
print(f"Total changes: {total_changes}")
print(f"Duplicates removed: {total_duplicates}")
if not dry_run:
print(f"Tag reduction: {len(tag_counts)}{len(final_tag_counts)} "
f"({len(tag_counts) - len(final_tag_counts)} tags removed)")
if dry_run:
print()
print("⚠️ This was a DRY RUN. No files were modified.")
print(" Run without --dry-run to apply changes.")
else:
print()
print("✅ Tag consolidation complete!")
if create_backup_flag:
print(f" Backup saved: {backup_dir}")
print("=" * 70)
# ============================================================================
# MAIN ENTRY POINT
# ============================================================================
def main():
parser = argparse.ArgumentParser(
description='Consolidate and standardize tags across all markdown files',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=__doc__
)
parser.add_argument('--dry-run', action='store_true',
help='Show what would change without making changes')
parser.add_argument('--no-backup', action='store_true',
help='Skip creating backup before changes')
args = parser.parse_args()
content_dir = Path("content/project")
if not content_dir.exists():
print(f"❌ Error: {content_dir} does not exist")
print(" Make sure you run this from the project root directory")
sys.exit(1)
try:
consolidate_all_tags(
content_dir,
dry_run=args.dry_run,
create_backup_flag=not args.no_backup
)
except KeyboardInterrupt:
print("\n\n❌ Interrupted by user")
sys.exit(1)
except Exception as e:
print(f"\n\n❌ Error: {e}")
import traceback
traceback.print_exc()
sys.exit(1)
if __name__ == "__main__":
main()

View file

@ -0,0 +1,193 @@
# Tag Consolidation Map
# Format: old_tag -> new_tag
# This will be used to standardize tags across the website
# Remove trailing commas and spaces
*, -> *
*, -> *
# Capitalization fixes
AI -> ai
Unity -> unity
Unity, -> unity
Unity, -> unity
Workshop -> workshop
Stable Diffusion -> stable diffusion
University of the Arts Berlin -> university of the arts berlin
Arduino -> arduino
Arduino, -> arduino
Arduino, -> arduino
MTCNN -> mtcnn
ISD -> isd
GOFAI -> gofai
CNN -> cnn
LoRa -> lora
Linux -> linux
Linux, -> linux
Linux, -> linux
Materialübung -> materialübung
SDR -> sdr
C# -> c#
C#, -> c#
C#, -> c#
# 3D printing consolidation
3D-Printing -> 3d printing
3D printing, -> 3d printing
additive manufacturing -> 3d printing
# Graphics/visuals
3D graphics -> 3d graphics
3D graphics, -> 3d graphics
# Remove trailing markers
1st person, -> 1st person
1st person, -> 1st person
2 player, -> 2 player
2 player, -> 2 player
3rd person, -> 3rd person
3rd person, -> 3rd person
# Language fixes (keep English)
programmierung, -> programming
programmierung -> programming
mobile werkstatt -> mobile workshop
urbane intervention -> urban intervention
blitz -> lightning
antenne -> antenna
elektronik -> electronics
bildung -> education
# Concept consolidation
automatic -> automation
automatic, -> automation
automatic, -> automation
automatic1111 -> stable diffusion
# Sustainability related
cradle-to-cradle -> sustainability
cradle-to-cradle, -> sustainability
cradle-to-cradle, -> sustainability
environment, -> environment
sustainability, -> sustainability
# Data related
data collection -> data
data collection, -> data
data viz -> data visualization
data viz, -> data visualization
# Energy related
energy, -> energy
electricity -> energy
electricity, -> energy
solar -> energy
solar, -> energy
grid -> energy
grid, -> energy
# Collaboration
collaborative -> collaboration
collaborative, -> collaboration
collaborative recycling -> recycling
# Circular economy
circular -> sustainability
circular, -> sustainability
# Communication
communication, -> communication
blogging -> communication
blogging, -> communication
# Waste/recycling
waste -> recycling
waste -> recycling
recycling, -> recycling
precious plastic -> recycling
precious plastic, -> recycling
shredder -> recycling
shredder, -> recycling
plastics-as-waste -> recycling
plastics-as-material -> plastics
plastics, -> plastics
# Research/university
university of osnabrück, -> university of osnabrück
university of osnabrück -> university of osnabrück
university -> research
research, -> research
master thesis -> thesis
master thesis, -> thesis
# Making/fabrication
filastruder -> 3d printing
filastruder, -> 3d printing
filament -> 3d printing
filament, -> 3d printing
design for printing -> 3d printing
# Engineering
engineering, -> engineering
# Experiments
experiment, -> experiment
# Simulation
simulation -> simulation
simulation, -> simulation
# Scaling
scaling -> design
scaling, -> design
# Games
game -> interactive
game, -> interactive
1st person -> interactive
2 player -> interactive
3rd person -> interactive
cyberpunk -> speculative design
cyberpunk, -> speculative design
# Hosting/infrastructure
hosting -> infrastructure
hosting, -> infrastructure
decentral -> decentralized
decentral, -> decentralized
decentral -> decentralized
decentral -> decentralized
# Geographic
india -> india
india, -> india
iit kharagpur -> india
iit kharagpur, -> india
himalaya -> india
himalaya, -> india
# Programming languages (keep specific when meaningful)
rust -> programming
rust, -> programming
physics -> programming
physics, -> programming
ml -> machine learning
ml, -> machine learning
# Private/work (seems like metadata, maybe remove?)
privat -> work
private, -> work
# Remove overly specific person names - consolidate to topic
alison jaggar -> philosophy
elizabeth anderson -> philosophy
elsa dorlin -> philosophy
francois ewald -> philosophy
josé medina -> philosophy
judith butler -> philosophy
michael foucault -> philosophy
miranda fricker -> philosophy
geert lovink -> media theory
evgeny morozov -> media theory
lisa parks -> media theory
francis hunger -> media theory