more translation, add unity for defences

2025-10-13 17:20:06 +02:00 · 2025-10-13 17:20:06 +02:00 · 205c953752
commit 205c953752
parent a41be821c1
768 changed files with 75229 additions and 21035 deletions
--- a/scripts/analyze_tags.sh
+++ b/scripts/analyze_tags.sh
@ -1,11 +1,16 @@
+
 #!/bin/bash

 # Script to analyze and consolidate tags across the website

+# Change to workspace root (one level up from scripts/)
+cd "$(dirname "$0")/.."
+
 echo "Analyzing all tags in the website..."
 echo "===================================="
 echo ""

+
 # Extract all tags and count their usage
 find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
  grep -E '^\s*"' | \
--- a/scripts/check_translations.py
+++ b/scripts/check_translations.py
@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+"""
+Check which German translation files are actually translated vs. just stubs or copies.
+
+This script compares English and German versions of project posts to identify:
+1. Missing German files
+2. Identical content (likely untranslated)
+3. Stub files with minimal content
+"""
+
+import os
+from pathlib import Path
+import re
+
+# Change to project root
+os.chdir(Path(__file__).parent.parent)
+
+def extract_content(filepath):
+    """Extract the main content (excluding frontmatter) from a markdown file."""
+    with open(filepath, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    # Split frontmatter from content
+    parts = content.split('+++')
+    if len(parts) >= 3:
+        # Return content after second +++
+        return parts[2].strip()
+    return content.strip()
+
+def extract_title_from_frontmatter(filepath):
+    """Extract title from frontmatter."""
+    with open(filepath, 'r', encoding='utf-8') as f:
+        content = f.read()
+
+    match = re.search(r'title\s*=\s*"([^"]+)"', content)
+    if match:
+        return match.group(1)
+    return None
+
+def similarity_ratio(text1, text2):
+    """Calculate a simple similarity ratio between two texts."""
+    if not text1 or not text2:
+        return 0.0
+
+    # Normalize whitespace
+    text1_norm = ' '.join(text1.split())
+    text2_norm = ' '.join(text2.split())
+
+    if text1_norm == text2_norm:
+        return 1.0
+
+    # Simple character-based similarity
+    longer = max(len(text1_norm), len(text2_norm))
+    if longer == 0:
+        return 1.0
+
+    # Count matching characters
+    matches = sum(c1 == c2 for c1, c2 in zip(text1_norm, text2_norm))
+    return matches / longer
+
+def main():
+    project_dir = Path("content/project")
+
+    missing_german = []
+    untranslated = []
+    stub_files = []
+    properly_translated = []
+
+    # Find all English index.md files
+    for en_file in sorted(project_dir.glob("*/index.md")):
+        project_folder = en_file.parent
+        de_file = project_folder / "index.de.md"
+
+        project_name = project_folder.name
+
+        # Check if German file exists
+        if not de_file.exists():
+            missing_german.append(project_name)
+            continue
+
+        # Extract content
+        en_content = extract_content(en_file)
+        de_content = extract_content(de_file)
+        en_title = extract_title_from_frontmatter(en_file)
+        de_title = extract_title_from_frontmatter(de_file)
+
+        # Check if content is identical or very similar
+        similarity = similarity_ratio(en_content, de_content)
+
+        # Check if German file is a stub (very short content)
+        de_word_count = len(de_content.split())
+
+        if similarity > 0.95:
+            untranslated.append({
+                'name': project_name,
+                'similarity': similarity,
+                'en_title': en_title,
+                'de_title': de_title
+            })
+        elif de_word_count < 20:
+            stub_files.append({
+                'name': project_name,
+                'word_count': de_word_count,
+                'en_title': en_title,
+                'de_title': de_title
+            })
+        else:
+            properly_translated.append({
+                'name': project_name,
+                'similarity': similarity,
+                'word_count': de_word_count
+            })
+
+    # Print results
+    print("=" * 80)
+    print("GERMAN TRANSLATION STATUS REPORT")
+    print("=" * 80)
+    print()
+
+    print(f"📊 SUMMARY")
+    print(f"  Total projects: {len(list(project_dir.glob('*/index.md')))}")
+    print(f"  ✅ Properly translated: {len(properly_translated)}")
+    print(f"  ❌ Missing German file: {len(missing_german)}")
+    print(f"  ⚠️  Untranslated (identical content): {len(untranslated)}")
+    print(f"  ⚠️  Stub files (< 20 words): {len(stub_files)}")
+    print()
+
+    if missing_german:
+        print("=" * 80)
+        print("❌ MISSING GERMAN FILES")
+        print("=" * 80)
+        for project in missing_german:
+            print(f"  • {project}")
+        print()
+
+    if untranslated:
+        print("=" * 80)
+        print("⚠️  UNTRANSLATED (Identical or near-identical to English)")
+        print("=" * 80)
+        for item in untranslated:
+            print(f"  • {item['name']}")
+            print(f"    Similarity: {item['similarity']:.1%}")
+            print(f"    EN title: {item['en_title']}")
+            print(f"    DE title: {item['de_title']}")
+            print()
+
+    if stub_files:
+        print("=" * 80)
+        print("⚠️  STUB FILES (Less than 20 words)")
+        print("=" * 80)
+        for item in stub_files:
+            print(f"  • {item['name']}")
+            print(f"    Word count: {item['word_count']}")
+            print(f"    EN title: {item['en_title']}")
+            print(f"    DE title: {item['de_title']}")
+            print()
+
+    print("=" * 80)
+    print(f"Total needing translation: {len(missing_german) + len(untranslated) + len(stub_files)}")
+    print("=" * 80)
+
+if __name__ == "__main__":
+    main()
--- a/scripts/consolidate_all_tags.py
+++ b/scripts/consolidate_all_tags.py
@ -17,6 +17,7 @@ Options:
    --no-backup  Skip backup creation
 """

+
 import re
 import sys
 import shutil
@ -24,6 +25,10 @@ from pathlib import Path
 from datetime import datetime
 from collections import defaultdict, Counter
 import argparse
+import os
+
+# Change to project root (one level up from scripts/)
+os.chdir(Path(__file__).parent.parent)

 # ============================================================================
 # TAG MAPPING CONFIGURATION
@ -166,7 +171,7 @@ def create_backup(content_dir):
    backup_dir = Path("backups") / f"tags_{timestamp}"
    backup_dir.mkdir(parents=True, exist_ok=True)

-    shutil.copytree(content_dir / "project", backup_dir / "project")
+    shutil.copytree(content_dir, backup_dir / "project")
    print(f"✓ Backup created: {backup_dir}")
    return backup_dir

--- a/scripts/convert_alerts_to_github.sh
+++ b/scripts/convert_alerts_to_github.sh
@ -0,0 +1,35 @@
+#!/bin/bash
+
+# Script to convert old alert syntax to GitHub-style alerts
+
+# Change to workspace root
+cd "$(dirname "$0")/.."
+
+# File to process
+FILE="content/project/2021-03-01-philosophy/index.md"
+
+if [ ! -f "$FILE" ]; then
+    echo "Error: File not found: $FILE"
+    exit 1
+fi
+
+# Create backup
+cp "$FILE" "$FILE.backup"
+
+# Convert {% alert(note=true) %} to > [!NOTE]
+# and {% end %} to empty line (removing the closing tag)
+
+# Use sed to do the transformation
+sed -i.tmp '
+    # Convert opening alert tag
+    s/{% alert(note=true) %}/> [!NOTE]/g
+
+    # Remove closing tag
+    /^{% end %}$/d
+' "$FILE"
+
+# Remove temporary file
+rm -f "$FILE.tmp"
+
+echo "Conversion complete!"
+echo "Backup saved as: $FILE.backup"
--- a/scripts/tag_consolidation_map.txt
+++ b/scripts/tag_consolidation_map.txt
@ -1,3 +1,28 @@
+# Artificial Intelligence consolidation
+ai -> artificial intelligence
+local AI -> artificial intelligence
+machine learning -> artificial intelligence
+neural nets -> artificial intelligence
+llm -> artificial intelligence
+gpt4all -> artificial intelligence
+diffusionbee -> artificial intelligence
+dreamfusion -> artificial intelligence
+comfyui -> artificial intelligence
+stable diffusion -> artificial intelligence
+nlu -> artificial intelligence
+nlp -> artificial intelligence
+object recognition -> artificial intelligence
+image recognition -> artificial intelligence
+
+# Capitalization fixes for programming languages and tools
+python -> Python
+python,  -> Python
+jupyter notebook -> Jupyter Notebook
+jupyter notebook,  -> Jupyter Notebook
+unity -> Unity
+unity,  -> Unity
+javascript -> JavaScript
+javascript,  -> JavaScript
 # Tag Consolidation Map
 # Format: old_tag -> new_tag
 # This will be used to standardize tags across the website