more translation
This commit is contained in:
parent
2ce9ca50b5
commit
a41be821c1
997 changed files with 33247 additions and 32490 deletions
25
scripts/analyze_tags.sh
Executable file
25
scripts/analyze_tags.sh
Executable file
|
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Script to analyze and consolidate tags across the website
|
||||
|
||||
echo "Analyzing all tags in the website..."
|
||||
echo "===================================="
|
||||
echo ""
|
||||
|
||||
# Extract all tags and count their usage
|
||||
find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
|
||||
grep -E '^\s*"' | \
|
||||
sed 's/^[[:space:]]*//' | \
|
||||
sed 's/"//g' | \
|
||||
sed 's/,$//' | \
|
||||
sort | uniq -c | sort -rn
|
||||
|
||||
echo ""
|
||||
echo "===================================="
|
||||
echo "Total unique tags:"
|
||||
find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
|
||||
grep -E '^\s*"' | \
|
||||
sed 's/^[[:space:]]*//' | \
|
||||
sed 's/"//g' | \
|
||||
sed 's/,$//' | \
|
||||
sort -u | wc -l
|
||||
400
scripts/consolidate_all_tags.py
Normal file
400
scripts/consolidate_all_tags.py
Normal file
|
|
@ -0,0 +1,400 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Complete Tag Consolidation Script for Zola Website
|
||||
|
||||
This script performs comprehensive tag standardization:
|
||||
1. Analyzes current tag usage
|
||||
2. Applies consolidation mappings (capitalization, language, concepts)
|
||||
3. Removes duplicate tags within files
|
||||
4. Generates a report
|
||||
|
||||
Usage:
|
||||
python3 consolidate_all_tags.py [--dry-run] [--backup]
|
||||
|
||||
Options:
|
||||
--dry-run Show what would change without making changes
|
||||
--backup Create backup before making changes (default: yes)
|
||||
--no-backup Skip backup creation
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
from collections import defaultdict, Counter
|
||||
import argparse
|
||||
|
||||
# ============================================================================
|
||||
# TAG MAPPING CONFIGURATION
|
||||
# ============================================================================
|
||||
|
||||
TAG_MAP = {
|
||||
# Capitalization fixes - everything lowercase
|
||||
"AI": "ai",
|
||||
"Unity": "unity",
|
||||
"Workshop": "workshop",
|
||||
"Stable Diffusion": "stable diffusion",
|
||||
"University of the Arts Berlin": "university of the arts berlin",
|
||||
"Arduino": "arduino",
|
||||
"Linux": "linux",
|
||||
"VLF": "vlf",
|
||||
"SDR": "sdr",
|
||||
"MTCNN": "mtcnn",
|
||||
"ISD": "isd",
|
||||
"GOFAI": "gofai",
|
||||
"CNN": "cnn",
|
||||
"LoRa": "lora",
|
||||
"Materialübung": "materialübung",
|
||||
"C#": "c#",
|
||||
|
||||
# 3D printing consolidation
|
||||
"3D-Printing": "3d printing",
|
||||
"3D printing": "3d printing",
|
||||
"additive manufacturing": "3d printing",
|
||||
|
||||
# Graphics
|
||||
"3D graphics": "3d graphics",
|
||||
|
||||
# Language fixes (English only - no German)
|
||||
"programmierung": "programming",
|
||||
"mobile werkstatt": "mobile workshop",
|
||||
"urbane intervention": "urban intervention",
|
||||
"bildung": "education",
|
||||
"antenne": "antenna",
|
||||
"elektronik": "electronics",
|
||||
"blitz": "lightning",
|
||||
|
||||
# Automation
|
||||
"automatic": "automation",
|
||||
"automatic1111": "stable diffusion",
|
||||
|
||||
# Sustainability
|
||||
"cradle-to-cradle": "sustainability",
|
||||
"circular": "sustainability",
|
||||
|
||||
# Data
|
||||
"data collection": "data",
|
||||
"data viz": "data visualization",
|
||||
|
||||
# Energy
|
||||
"electricity": "energy",
|
||||
"solar": "energy",
|
||||
"grid": "energy",
|
||||
|
||||
# Collaboration
|
||||
"collaborative": "collaboration",
|
||||
"collaborative recycling": "recycling",
|
||||
|
||||
# Communication
|
||||
"blogging": "communication",
|
||||
|
||||
# Waste/recycling
|
||||
"waste": "recycling",
|
||||
"precious plastic": "recycling",
|
||||
"shredder": "recycling",
|
||||
"plastics-as-waste": "recycling",
|
||||
"plastics-as-material": "plastics",
|
||||
|
||||
# University/research
|
||||
"university": "research",
|
||||
"master thesis": "thesis",
|
||||
|
||||
# Making/fabrication
|
||||
"filastruder": "3d printing",
|
||||
"filament": "3d printing",
|
||||
"design for printing": "3d printing",
|
||||
|
||||
# Simulation
|
||||
"simulation": "simulation",
|
||||
|
||||
# Scaling/design
|
||||
"scaling": "design",
|
||||
|
||||
# Games/interactive
|
||||
"game": "interactive",
|
||||
"1st person": "interactive",
|
||||
"2 player": "interactive",
|
||||
"3rd person": "interactive",
|
||||
"cyberpunk": "speculative design",
|
||||
|
||||
# Infrastructure
|
||||
"hosting": "infrastructure",
|
||||
"decentral": "decentralized",
|
||||
|
||||
# Geographic
|
||||
"iit kharagpur": "india",
|
||||
"himalaya": "india",
|
||||
|
||||
# Programming
|
||||
"rust": "programming",
|
||||
"physics": "programming",
|
||||
"ml": "machine learning",
|
||||
|
||||
# Work/private
|
||||
"privat": "work",
|
||||
|
||||
# Person names -> topics
|
||||
"alison jaggar": "philosophy",
|
||||
"elizabeth anderson": "philosophy",
|
||||
"elsa dorlin": "philosophy",
|
||||
"francois ewald": "philosophy",
|
||||
"josé medina": "philosophy",
|
||||
"judith butler": "philosophy",
|
||||
"michael foucault": "philosophy",
|
||||
"miranda fricker": "philosophy",
|
||||
"geert lovink": "media theory",
|
||||
"evgeny morozov": "media theory",
|
||||
"lisa parks": "media theory",
|
||||
"francis hunger": "media theory",
|
||||
|
||||
# Remove entirely
|
||||
"TODO, unfinished": None,
|
||||
}
|
||||
|
||||
# ============================================================================
|
||||
# HELPER FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
def clean_tag(tag):
|
||||
"""Remove trailing commas, spaces, and normalize"""
|
||||
return tag.strip().rstrip(',').strip()
|
||||
|
||||
def create_backup(content_dir):
|
||||
"""Create timestamped backup of content directory"""
|
||||
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
||||
backup_dir = Path("backups") / f"tags_{timestamp}"
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
shutil.copytree(content_dir / "project", backup_dir / "project")
|
||||
print(f"✓ Backup created: {backup_dir}")
|
||||
return backup_dir
|
||||
|
||||
def analyze_tags(content_dir):
|
||||
"""Analyze current tag usage across all files"""
|
||||
all_tags = []
|
||||
|
||||
for md_file in content_dir.rglob("*.md"):
|
||||
with open(md_file, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
tags_pattern = r'tags = \[(.*?)\]'
|
||||
match = re.search(tags_pattern, content, re.DOTALL)
|
||||
if match:
|
||||
tag_pattern = r'"([^"]+)"'
|
||||
tags = re.findall(tag_pattern, match.group(1))
|
||||
all_tags.extend([clean_tag(t) for t in tags])
|
||||
|
||||
return Counter(all_tags)
|
||||
|
||||
# ============================================================================
|
||||
# MAIN PROCESSING FUNCTIONS
|
||||
# ============================================================================
|
||||
|
||||
def process_file(filepath, dry_run=False):
|
||||
"""
|
||||
Process a single markdown file:
|
||||
1. Apply tag mappings
|
||||
2. Remove duplicates
|
||||
3. Clean formatting
|
||||
"""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Find tags section
|
||||
tags_pattern = r'(tags = \[)(.*?)(\])'
|
||||
match = re.search(tags_pattern, content, re.DOTALL)
|
||||
|
||||
if not match:
|
||||
return None
|
||||
|
||||
before_section = match.group(1)
|
||||
tags_content = match.group(2)
|
||||
after_section = match.group(3)
|
||||
|
||||
# Extract tags
|
||||
tag_pattern = r'"([^"]+)"'
|
||||
original_tags = re.findall(tag_pattern, tags_content)
|
||||
|
||||
# Process tags
|
||||
processed_tags = []
|
||||
changes = []
|
||||
|
||||
for tag in original_tags:
|
||||
cleaned = clean_tag(tag)
|
||||
|
||||
# Apply mapping
|
||||
if cleaned in TAG_MAP:
|
||||
mapped = TAG_MAP[cleaned]
|
||||
if mapped is None:
|
||||
# Tag marked for removal
|
||||
changes.append(f" ✗ {cleaned} (removed)")
|
||||
continue
|
||||
elif mapped != cleaned:
|
||||
changes.append(f" → {cleaned} → {mapped}")
|
||||
processed_tags.append(mapped)
|
||||
else:
|
||||
processed_tags.append(mapped)
|
||||
elif cleaned != tag:
|
||||
# Just cleaned, no mapping
|
||||
changes.append(f" ✓ {tag} → {cleaned} (cleaned)")
|
||||
processed_tags.append(cleaned)
|
||||
else:
|
||||
processed_tags.append(cleaned)
|
||||
|
||||
# Remove duplicates while preserving order
|
||||
seen = set()
|
||||
unique_tags = []
|
||||
duplicates = []
|
||||
|
||||
for tag in processed_tags:
|
||||
tag_lower = tag.lower().strip()
|
||||
if tag_lower not in seen:
|
||||
seen.add(tag_lower)
|
||||
unique_tags.append(tag)
|
||||
else:
|
||||
duplicates.append(tag)
|
||||
|
||||
if duplicates:
|
||||
changes.append(f" ⚠ Removed duplicates: {', '.join(duplicates)}")
|
||||
|
||||
if not changes:
|
||||
return None # No changes needed
|
||||
|
||||
# Reconstruct tags section
|
||||
new_tags_content = '\n "' + '",\n "'.join(unique_tags) + '",\n'
|
||||
new_tags_section = before_section + new_tags_content + after_section
|
||||
new_content = content[:match.start()] + new_tags_section + content[match.end():]
|
||||
|
||||
# Write changes
|
||||
if not dry_run:
|
||||
with open(filepath, 'w', encoding='utf-8') as f:
|
||||
f.write(new_content)
|
||||
|
||||
return {
|
||||
'changes': changes,
|
||||
'before_count': len(original_tags),
|
||||
'after_count': len(unique_tags),
|
||||
'duplicates_removed': len(duplicates)
|
||||
}
|
||||
|
||||
def consolidate_all_tags(content_dir, dry_run=False, create_backup_flag=True):
|
||||
"""Main consolidation process"""
|
||||
|
||||
print("=" * 70)
|
||||
print("TAG CONSOLIDATION SCRIPT")
|
||||
print("=" * 70)
|
||||
print()
|
||||
|
||||
# Step 1: Analyze current state
|
||||
print("📊 Analyzing current tags...")
|
||||
tag_counts = analyze_tags(content_dir)
|
||||
print(f" Total unique tags: {len(tag_counts)}")
|
||||
print(f" Total tag instances: {sum(tag_counts.values())}")
|
||||
print()
|
||||
|
||||
# Step 2: Create backup
|
||||
if create_backup_flag and not dry_run:
|
||||
print("💾 Creating backup...")
|
||||
backup_dir = create_backup(content_dir)
|
||||
print()
|
||||
|
||||
# Step 3: Process files
|
||||
if dry_run:
|
||||
print("🔍 DRY RUN - No changes will be made")
|
||||
print()
|
||||
else:
|
||||
print("🔧 Processing files...")
|
||||
print()
|
||||
|
||||
files_changed = 0
|
||||
total_changes = 0
|
||||
total_duplicates = 0
|
||||
|
||||
for md_file in sorted(content_dir.rglob("*.md")):
|
||||
result = process_file(md_file, dry_run)
|
||||
if result:
|
||||
rel_path = md_file.relative_to(content_dir.parent)
|
||||
print(f"📝 {rel_path}")
|
||||
print(f" Tags: {result['before_count']} → {result['after_count']}")
|
||||
for change in result['changes']:
|
||||
print(change)
|
||||
print()
|
||||
|
||||
files_changed += 1
|
||||
total_changes += len(result['changes'])
|
||||
total_duplicates += result['duplicates_removed']
|
||||
|
||||
# Step 4: Final analysis
|
||||
if not dry_run:
|
||||
print("=" * 70)
|
||||
print("📊 Final analysis...")
|
||||
final_tag_counts = analyze_tags(content_dir)
|
||||
print(f" Total unique tags: {len(final_tag_counts)}")
|
||||
print(f" Total tag instances: {sum(final_tag_counts.values())}")
|
||||
print()
|
||||
|
||||
# Step 5: Summary
|
||||
print("=" * 70)
|
||||
print("SUMMARY")
|
||||
print("=" * 70)
|
||||
print(f"Files processed: {files_changed}")
|
||||
print(f"Total changes: {total_changes}")
|
||||
print(f"Duplicates removed: {total_duplicates}")
|
||||
|
||||
if not dry_run:
|
||||
print(f"Tag reduction: {len(tag_counts)} → {len(final_tag_counts)} "
|
||||
f"({len(tag_counts) - len(final_tag_counts)} tags removed)")
|
||||
|
||||
if dry_run:
|
||||
print()
|
||||
print("⚠️ This was a DRY RUN. No files were modified.")
|
||||
print(" Run without --dry-run to apply changes.")
|
||||
else:
|
||||
print()
|
||||
print("✅ Tag consolidation complete!")
|
||||
if create_backup_flag:
|
||||
print(f" Backup saved: {backup_dir}")
|
||||
|
||||
print("=" * 70)
|
||||
|
||||
# ============================================================================
|
||||
# MAIN ENTRY POINT
|
||||
# ============================================================================
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Consolidate and standardize tags across all markdown files',
|
||||
formatter_class=argparse.RawDescriptionHelpFormatter,
|
||||
epilog=__doc__
|
||||
)
|
||||
parser.add_argument('--dry-run', action='store_true',
|
||||
help='Show what would change without making changes')
|
||||
parser.add_argument('--no-backup', action='store_true',
|
||||
help='Skip creating backup before changes')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
content_dir = Path("content/project")
|
||||
if not content_dir.exists():
|
||||
print(f"❌ Error: {content_dir} does not exist")
|
||||
print(" Make sure you run this from the project root directory")
|
||||
sys.exit(1)
|
||||
|
||||
try:
|
||||
consolidate_all_tags(
|
||||
content_dir,
|
||||
dry_run=args.dry_run,
|
||||
create_backup_flag=not args.no_backup
|
||||
)
|
||||
except KeyboardInterrupt:
|
||||
print("\n\n❌ Interrupted by user")
|
||||
sys.exit(1)
|
||||
except Exception as e:
|
||||
print(f"\n\n❌ Error: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
193
scripts/tag_consolidation_map.txt
Normal file
193
scripts/tag_consolidation_map.txt
Normal file
|
|
@ -0,0 +1,193 @@
|
|||
# Tag Consolidation Map
|
||||
# Format: old_tag -> new_tag
|
||||
# This will be used to standardize tags across the website
|
||||
|
||||
# Remove trailing commas and spaces
|
||||
*, -> *
|
||||
*, -> *
|
||||
|
||||
# Capitalization fixes
|
||||
AI -> ai
|
||||
Unity -> unity
|
||||
Unity, -> unity
|
||||
Unity, -> unity
|
||||
Workshop -> workshop
|
||||
Stable Diffusion -> stable diffusion
|
||||
University of the Arts Berlin -> university of the arts berlin
|
||||
Arduino -> arduino
|
||||
Arduino, -> arduino
|
||||
Arduino, -> arduino
|
||||
MTCNN -> mtcnn
|
||||
ISD -> isd
|
||||
GOFAI -> gofai
|
||||
CNN -> cnn
|
||||
LoRa -> lora
|
||||
Linux -> linux
|
||||
Linux, -> linux
|
||||
Linux, -> linux
|
||||
Materialübung -> materialübung
|
||||
SDR -> sdr
|
||||
C# -> c#
|
||||
C#, -> c#
|
||||
C#, -> c#
|
||||
|
||||
# 3D printing consolidation
|
||||
3D-Printing -> 3d printing
|
||||
3D printing, -> 3d printing
|
||||
additive manufacturing -> 3d printing
|
||||
|
||||
# Graphics/visuals
|
||||
3D graphics -> 3d graphics
|
||||
3D graphics, -> 3d graphics
|
||||
|
||||
# Remove trailing markers
|
||||
1st person, -> 1st person
|
||||
1st person, -> 1st person
|
||||
2 player, -> 2 player
|
||||
2 player, -> 2 player
|
||||
3rd person, -> 3rd person
|
||||
3rd person, -> 3rd person
|
||||
|
||||
# Language fixes (keep English)
|
||||
programmierung, -> programming
|
||||
programmierung -> programming
|
||||
mobile werkstatt -> mobile workshop
|
||||
urbane intervention -> urban intervention
|
||||
blitz -> lightning
|
||||
antenne -> antenna
|
||||
elektronik -> electronics
|
||||
bildung -> education
|
||||
|
||||
# Concept consolidation
|
||||
automatic -> automation
|
||||
automatic, -> automation
|
||||
automatic, -> automation
|
||||
automatic1111 -> stable diffusion
|
||||
|
||||
# Sustainability related
|
||||
cradle-to-cradle -> sustainability
|
||||
cradle-to-cradle, -> sustainability
|
||||
cradle-to-cradle, -> sustainability
|
||||
environment, -> environment
|
||||
sustainability, -> sustainability
|
||||
|
||||
# Data related
|
||||
data collection -> data
|
||||
data collection, -> data
|
||||
data viz -> data visualization
|
||||
data viz, -> data visualization
|
||||
|
||||
# Energy related
|
||||
energy, -> energy
|
||||
electricity -> energy
|
||||
electricity, -> energy
|
||||
solar -> energy
|
||||
solar, -> energy
|
||||
grid -> energy
|
||||
grid, -> energy
|
||||
|
||||
# Collaboration
|
||||
collaborative -> collaboration
|
||||
collaborative, -> collaboration
|
||||
collaborative recycling -> recycling
|
||||
|
||||
# Circular economy
|
||||
circular -> sustainability
|
||||
circular, -> sustainability
|
||||
|
||||
# Communication
|
||||
communication, -> communication
|
||||
blogging -> communication
|
||||
blogging, -> communication
|
||||
|
||||
# Waste/recycling
|
||||
waste -> recycling
|
||||
waste -> recycling
|
||||
recycling, -> recycling
|
||||
precious plastic -> recycling
|
||||
precious plastic, -> recycling
|
||||
shredder -> recycling
|
||||
shredder, -> recycling
|
||||
plastics-as-waste -> recycling
|
||||
plastics-as-material -> plastics
|
||||
plastics, -> plastics
|
||||
|
||||
# Research/university
|
||||
university of osnabrück, -> university of osnabrück
|
||||
university of osnabrück -> university of osnabrück
|
||||
university -> research
|
||||
research, -> research
|
||||
master thesis -> thesis
|
||||
master thesis, -> thesis
|
||||
|
||||
# Making/fabrication
|
||||
filastruder -> 3d printing
|
||||
filastruder, -> 3d printing
|
||||
filament -> 3d printing
|
||||
filament, -> 3d printing
|
||||
design for printing -> 3d printing
|
||||
|
||||
# Engineering
|
||||
engineering, -> engineering
|
||||
|
||||
# Experiments
|
||||
experiment, -> experiment
|
||||
|
||||
# Simulation
|
||||
simulation -> simulation
|
||||
simulation, -> simulation
|
||||
|
||||
# Scaling
|
||||
scaling -> design
|
||||
scaling, -> design
|
||||
|
||||
# Games
|
||||
game -> interactive
|
||||
game, -> interactive
|
||||
1st person -> interactive
|
||||
2 player -> interactive
|
||||
3rd person -> interactive
|
||||
cyberpunk -> speculative design
|
||||
cyberpunk, -> speculative design
|
||||
|
||||
# Hosting/infrastructure
|
||||
hosting -> infrastructure
|
||||
hosting, -> infrastructure
|
||||
decentral -> decentralized
|
||||
decentral, -> decentralized
|
||||
decentral -> decentralized
|
||||
decentral -> decentralized
|
||||
|
||||
# Geographic
|
||||
india -> india
|
||||
india, -> india
|
||||
iit kharagpur -> india
|
||||
iit kharagpur, -> india
|
||||
himalaya -> india
|
||||
himalaya, -> india
|
||||
|
||||
# Programming languages (keep specific when meaningful)
|
||||
rust -> programming
|
||||
rust, -> programming
|
||||
physics -> programming
|
||||
physics, -> programming
|
||||
ml -> machine learning
|
||||
ml, -> machine learning
|
||||
|
||||
# Private/work (seems like metadata, maybe remove?)
|
||||
privat -> work
|
||||
private, -> work
|
||||
|
||||
# Remove overly specific person names - consolidate to topic
|
||||
alison jaggar -> philosophy
|
||||
elizabeth anderson -> philosophy
|
||||
elsa dorlin -> philosophy
|
||||
francois ewald -> philosophy
|
||||
josé medina -> philosophy
|
||||
judith butler -> philosophy
|
||||
michael foucault -> philosophy
|
||||
miranda fricker -> philosophy
|
||||
geert lovink -> media theory
|
||||
evgeny morozov -> media theory
|
||||
lisa parks -> media theory
|
||||
francis hunger -> media theory
|
||||
Loading…
Add table
Add a link
Reference in a new issue