| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139 |
- import hashlib
- import json
- import os
- import sys
- import anthropic
- import frontmatter
- import yaml
- ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
- if not ANTHROPIC_API_KEY:
- print('Error: ANTHROPIC_API_KEY environment variable not set')
- sys.exit(1)
- client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
- DOCS_DIR = 'docs/'
- CACHE_FILE = os.path.join(DOCS_DIR, 'translation_cache.json')
- # Supported languages and their codes
- LANGUAGES = {'fr': 'French', 'zh-Hans': 'Simplified Chinese'}
- def get_file_hash(filepath):
- """Calculate MD5 hash of a file."""
- with open(filepath, 'rb') as f:
- return hashlib.md5(f.read()).hexdigest()
- def load_file_hashes():
- """Load previously saved file hashes."""
- if os.path.exists(CACHE_FILE):
- with open(CACHE_FILE, 'r') as f:
- return json.load(f)
- return {}
- def save_file_hashes(hashes):
- """Save current file hashes."""
- with open(CACHE_FILE, 'w') as f:
- json.dump(hashes, f)
- def get_translation_path(source_path, lang):
- """Get the corresponding translation file path for a source file."""
- relative_path = os.path.relpath(source_path, 'docs/modules')
- return f'docs/i18n/{lang}/docusaurus-plugin-content-docs/current/{relative_path}'
- def translate_content(content, target_lang):
- """Translate content using Anthropic's Claude."""
- system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.'
- message = client.messages.create(
- model='claude-3-opus-20240229',
- max_tokens=4096,
- temperature=0,
- system=system_prompt,
- messages=[
- {'role': 'user', 'content': f'Please translate this content:\n\n{content}'}
- ],
- )
- return message.content[0].text
- def process_file(source_path, lang):
- """Process a single file for translation."""
- # Skip non-markdown files
- if not source_path.endswith(('.md', '.mdx')):
- return
- translation_path = get_translation_path(source_path, lang)
- os.makedirs(os.path.dirname(translation_path), exist_ok=True)
- # Read source content
- with open(source_path, 'r', encoding='utf-8') as f:
- content = f.read()
- # Parse frontmatter if exists
- has_frontmatter = content.startswith('---')
- if has_frontmatter:
- post = frontmatter.loads(content)
- metadata = post.metadata
- content_without_frontmatter = post.content
- else:
- metadata = {}
- content_without_frontmatter = content
- # Translate the content
- print('translating...', source_path, lang)
- translated_content = translate_content(content_without_frontmatter, LANGUAGES[lang])
- print('translation done')
- # Reconstruct the file with frontmatter if it existed
- if has_frontmatter:
- final_content = '---\n'
- final_content += yaml.dump(metadata, allow_unicode=True)
- final_content += '---\n\n'
- final_content += translated_content
- else:
- final_content = translated_content
- # Write the translated content
- with open(translation_path, 'w', encoding='utf-8') as f:
- f.write(final_content)
- print(f'Updated translation for {source_path} in {lang}')
- def main():
- previous_hashes = load_file_hashes()
- current_hashes = {}
- # Walk through all files in docs/modules
- for root, _, files in os.walk('docs/modules'):
- for file in files:
- if file.endswith(('.md', '.mdx')):
- filepath = os.path.join(root, file)
- current_hash = get_file_hash(filepath)
- current_hashes[filepath] = current_hash
- # Check if file is new or modified
- if (
- filepath not in previous_hashes
- or previous_hashes[filepath] != current_hash
- ):
- print(f'Change detected in {filepath}')
- for lang in LANGUAGES:
- process_file(filepath, lang)
- print('all files up to date, saving hashes')
- save_file_hashes(current_hashes)
- previous_hashes = current_hashes
- if __name__ == '__main__':
- main()
|