more translation, add unity for defences
This commit is contained in:
parent
a41be821c1
commit
205c953752
768 changed files with 75229 additions and 21035 deletions
|
|
@ -1,11 +1,16 @@
|
|||
|
||||
#!/bin/bash
|
||||
|
||||
# Script to analyze and consolidate tags across the website
|
||||
|
||||
# Change to workspace root (one level up from scripts/)
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
echo "Analyzing all tags in the website..."
|
||||
echo "===================================="
|
||||
echo ""
|
||||
|
||||
|
||||
# Extract all tags and count their usage
|
||||
find content/project -name "*.md" -exec grep -A 20 "tags = \[" {} \; | \
|
||||
grep -E '^\s*"' | \
|
||||
|
|
|
|||
163
scripts/check_translations.py
Normal file
163
scripts/check_translations.py
Normal file
|
|
@ -0,0 +1,163 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Check which German translation files are actually translated vs. just stubs or copies.
|
||||
|
||||
This script compares English and German versions of project posts to identify:
|
||||
1. Missing German files
|
||||
2. Identical content (likely untranslated)
|
||||
3. Stub files with minimal content
|
||||
"""
|
||||
|
||||
import os
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
# Change to project root
|
||||
os.chdir(Path(__file__).parent.parent)
|
||||
|
||||
def extract_content(filepath):
|
||||
"""Extract the main content (excluding frontmatter) from a markdown file."""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
# Split frontmatter from content
|
||||
parts = content.split('+++')
|
||||
if len(parts) >= 3:
|
||||
# Return content after second +++
|
||||
return parts[2].strip()
|
||||
return content.strip()
|
||||
|
||||
def extract_title_from_frontmatter(filepath):
|
||||
"""Extract title from frontmatter."""
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
content = f.read()
|
||||
|
||||
match = re.search(r'title\s*=\s*"([^"]+)"', content)
|
||||
if match:
|
||||
return match.group(1)
|
||||
return None
|
||||
|
||||
def similarity_ratio(text1, text2):
|
||||
"""Calculate a simple similarity ratio between two texts."""
|
||||
if not text1 or not text2:
|
||||
return 0.0
|
||||
|
||||
# Normalize whitespace
|
||||
text1_norm = ' '.join(text1.split())
|
||||
text2_norm = ' '.join(text2.split())
|
||||
|
||||
if text1_norm == text2_norm:
|
||||
return 1.0
|
||||
|
||||
# Simple character-based similarity
|
||||
longer = max(len(text1_norm), len(text2_norm))
|
||||
if longer == 0:
|
||||
return 1.0
|
||||
|
||||
# Count matching characters
|
||||
matches = sum(c1 == c2 for c1, c2 in zip(text1_norm, text2_norm))
|
||||
return matches / longer
|
||||
|
||||
def main():
|
||||
project_dir = Path("content/project")
|
||||
|
||||
missing_german = []
|
||||
untranslated = []
|
||||
stub_files = []
|
||||
properly_translated = []
|
||||
|
||||
# Find all English index.md files
|
||||
for en_file in sorted(project_dir.glob("*/index.md")):
|
||||
project_folder = en_file.parent
|
||||
de_file = project_folder / "index.de.md"
|
||||
|
||||
project_name = project_folder.name
|
||||
|
||||
# Check if German file exists
|
||||
if not de_file.exists():
|
||||
missing_german.append(project_name)
|
||||
continue
|
||||
|
||||
# Extract content
|
||||
en_content = extract_content(en_file)
|
||||
de_content = extract_content(de_file)
|
||||
en_title = extract_title_from_frontmatter(en_file)
|
||||
de_title = extract_title_from_frontmatter(de_file)
|
||||
|
||||
# Check if content is identical or very similar
|
||||
similarity = similarity_ratio(en_content, de_content)
|
||||
|
||||
# Check if German file is a stub (very short content)
|
||||
de_word_count = len(de_content.split())
|
||||
|
||||
if similarity > 0.95:
|
||||
untranslated.append({
|
||||
'name': project_name,
|
||||
'similarity': similarity,
|
||||
'en_title': en_title,
|
||||
'de_title': de_title
|
||||
})
|
||||
elif de_word_count < 20:
|
||||
stub_files.append({
|
||||
'name': project_name,
|
||||
'word_count': de_word_count,
|
||||
'en_title': en_title,
|
||||
'de_title': de_title
|
||||
})
|
||||
else:
|
||||
properly_translated.append({
|
||||
'name': project_name,
|
||||
'similarity': similarity,
|
||||
'word_count': de_word_count
|
||||
})
|
||||
|
||||
# Print results
|
||||
print("=" * 80)
|
||||
print("GERMAN TRANSLATION STATUS REPORT")
|
||||
print("=" * 80)
|
||||
print()
|
||||
|
||||
print(f"📊 SUMMARY")
|
||||
print(f" Total projects: {len(list(project_dir.glob('*/index.md')))}")
|
||||
print(f" ✅ Properly translated: {len(properly_translated)}")
|
||||
print(f" ❌ Missing German file: {len(missing_german)}")
|
||||
print(f" ⚠️ Untranslated (identical content): {len(untranslated)}")
|
||||
print(f" ⚠️ Stub files (< 20 words): {len(stub_files)}")
|
||||
print()
|
||||
|
||||
if missing_german:
|
||||
print("=" * 80)
|
||||
print("❌ MISSING GERMAN FILES")
|
||||
print("=" * 80)
|
||||
for project in missing_german:
|
||||
print(f" • {project}")
|
||||
print()
|
||||
|
||||
if untranslated:
|
||||
print("=" * 80)
|
||||
print("⚠️ UNTRANSLATED (Identical or near-identical to English)")
|
||||
print("=" * 80)
|
||||
for item in untranslated:
|
||||
print(f" • {item['name']}")
|
||||
print(f" Similarity: {item['similarity']:.1%}")
|
||||
print(f" EN title: {item['en_title']}")
|
||||
print(f" DE title: {item['de_title']}")
|
||||
print()
|
||||
|
||||
if stub_files:
|
||||
print("=" * 80)
|
||||
print("⚠️ STUB FILES (Less than 20 words)")
|
||||
print("=" * 80)
|
||||
for item in stub_files:
|
||||
print(f" • {item['name']}")
|
||||
print(f" Word count: {item['word_count']}")
|
||||
print(f" EN title: {item['en_title']}")
|
||||
print(f" DE title: {item['de_title']}")
|
||||
print()
|
||||
|
||||
print("=" * 80)
|
||||
print(f"Total needing translation: {len(missing_german) + len(untranslated) + len(stub_files)}")
|
||||
print("=" * 80)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -17,6 +17,7 @@ Options:
|
|||
--no-backup Skip backup creation
|
||||
"""
|
||||
|
||||
|
||||
import re
|
||||
import sys
|
||||
import shutil
|
||||
|
|
@ -24,6 +25,10 @@ from pathlib import Path
|
|||
from datetime import datetime
|
||||
from collections import defaultdict, Counter
|
||||
import argparse
|
||||
import os
|
||||
|
||||
# Change to project root (one level up from scripts/)
|
||||
os.chdir(Path(__file__).parent.parent)
|
||||
|
||||
# ============================================================================
|
||||
# TAG MAPPING CONFIGURATION
|
||||
|
|
@ -166,7 +171,7 @@ def create_backup(content_dir):
|
|||
backup_dir = Path("backups") / f"tags_{timestamp}"
|
||||
backup_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
shutil.copytree(content_dir / "project", backup_dir / "project")
|
||||
shutil.copytree(content_dir, backup_dir / "project")
|
||||
print(f"✓ Backup created: {backup_dir}")
|
||||
return backup_dir
|
||||
|
||||
|
|
|
|||
35
scripts/convert_alerts_to_github.sh
Executable file
35
scripts/convert_alerts_to_github.sh
Executable file
|
|
@ -0,0 +1,35 @@
|
|||
#!/bin/bash
|
||||
|
||||
# Script to convert old alert syntax to GitHub-style alerts
|
||||
|
||||
# Change to workspace root
|
||||
cd "$(dirname "$0")/.."
|
||||
|
||||
# File to process
|
||||
FILE="content/project/2021-03-01-philosophy/index.md"
|
||||
|
||||
if [ ! -f "$FILE" ]; then
|
||||
echo "Error: File not found: $FILE"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Create backup
|
||||
cp "$FILE" "$FILE.backup"
|
||||
|
||||
# Convert {% alert(note=true) %} to > [!NOTE]
|
||||
# and {% end %} to empty line (removing the closing tag)
|
||||
|
||||
# Use sed to do the transformation
|
||||
sed -i.tmp '
|
||||
# Convert opening alert tag
|
||||
s/{% alert(note=true) %}/> [!NOTE]/g
|
||||
|
||||
# Remove closing tag
|
||||
/^{% end %}$/d
|
||||
' "$FILE"
|
||||
|
||||
# Remove temporary file
|
||||
rm -f "$FILE.tmp"
|
||||
|
||||
echo "Conversion complete!"
|
||||
echo "Backup saved as: $FILE.backup"
|
||||
|
|
@ -1,3 +1,28 @@
|
|||
# Artificial Intelligence consolidation
|
||||
ai -> artificial intelligence
|
||||
local AI -> artificial intelligence
|
||||
machine learning -> artificial intelligence
|
||||
neural nets -> artificial intelligence
|
||||
llm -> artificial intelligence
|
||||
gpt4all -> artificial intelligence
|
||||
diffusionbee -> artificial intelligence
|
||||
dreamfusion -> artificial intelligence
|
||||
comfyui -> artificial intelligence
|
||||
stable diffusion -> artificial intelligence
|
||||
nlu -> artificial intelligence
|
||||
nlp -> artificial intelligence
|
||||
object recognition -> artificial intelligence
|
||||
image recognition -> artificial intelligence
|
||||
|
||||
# Capitalization fixes for programming languages and tools
|
||||
python -> Python
|
||||
python, -> Python
|
||||
jupyter notebook -> Jupyter Notebook
|
||||
jupyter notebook, -> Jupyter Notebook
|
||||
unity -> Unity
|
||||
unity, -> Unity
|
||||
javascript -> JavaScript
|
||||
javascript, -> JavaScript
|
||||
# Tag Consolidation Map
|
||||
# Format: old_tag -> new_tag
|
||||
# This will be used to standardize tags across the website
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue