translation_updater.py 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139
  1. import hashlib
  2. import json
  3. import os
  4. import sys
  5. import anthropic
  6. import frontmatter
  7. import yaml
  8. ANTHROPIC_API_KEY = os.getenv('ANTHROPIC_API_KEY')
  9. if not ANTHROPIC_API_KEY:
  10. print('Error: ANTHROPIC_API_KEY environment variable not set')
  11. sys.exit(1)
  12. client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
  13. DOCS_DIR = 'docs/'
  14. CACHE_FILE = os.path.join(DOCS_DIR, 'translation_cache.json')
  15. # Supported languages and their codes
  16. LANGUAGES = {'fr': 'French', 'zh-Hans': 'Simplified Chinese'}
  17. def get_file_hash(filepath):
  18. """Calculate MD5 hash of a file."""
  19. with open(filepath, 'rb') as f:
  20. return hashlib.md5(f.read()).hexdigest()
  21. def load_file_hashes():
  22. """Load previously saved file hashes."""
  23. if os.path.exists(CACHE_FILE):
  24. with open(CACHE_FILE, 'r') as f:
  25. return json.load(f)
  26. return {}
  27. def save_file_hashes(hashes):
  28. """Save current file hashes."""
  29. with open(CACHE_FILE, 'w') as f:
  30. json.dump(hashes, f)
  31. def get_translation_path(source_path, lang):
  32. """Get the corresponding translation file path for a source file."""
  33. relative_path = os.path.relpath(source_path, 'docs/modules')
  34. return f'docs/i18n/{lang}/docusaurus-plugin-content-docs/current/{relative_path}'
  35. def translate_content(content, target_lang):
  36. """Translate content using Anthropic's Claude."""
  37. system_prompt = f'You are a professional translator. Translate the following content into {target_lang}. Preserve all Markdown formatting, code blocks, and front matter. Keep any {{% jsx %}} tags and similar intact. Do not translate code examples, URLs, or technical terms.'
  38. message = client.messages.create(
  39. model='claude-3-opus-20240229',
  40. max_tokens=4096,
  41. temperature=0,
  42. system=system_prompt,
  43. messages=[
  44. {'role': 'user', 'content': f'Please translate this content:\n\n{content}'}
  45. ],
  46. )
  47. return message.content[0].text
  48. def process_file(source_path, lang):
  49. """Process a single file for translation."""
  50. # Skip non-markdown files
  51. if not source_path.endswith(('.md', '.mdx')):
  52. return
  53. translation_path = get_translation_path(source_path, lang)
  54. os.makedirs(os.path.dirname(translation_path), exist_ok=True)
  55. # Read source content
  56. with open(source_path, 'r', encoding='utf-8') as f:
  57. content = f.read()
  58. # Parse frontmatter if exists
  59. has_frontmatter = content.startswith('---')
  60. if has_frontmatter:
  61. post = frontmatter.loads(content)
  62. metadata = post.metadata
  63. content_without_frontmatter = post.content
  64. else:
  65. metadata = {}
  66. content_without_frontmatter = content
  67. # Translate the content
  68. print('translating...', source_path, lang)
  69. translated_content = translate_content(content_without_frontmatter, LANGUAGES[lang])
  70. print('translation done')
  71. # Reconstruct the file with frontmatter if it existed
  72. if has_frontmatter:
  73. final_content = '---\n'
  74. final_content += yaml.dump(metadata, allow_unicode=True)
  75. final_content += '---\n\n'
  76. final_content += translated_content
  77. else:
  78. final_content = translated_content
  79. # Write the translated content
  80. with open(translation_path, 'w', encoding='utf-8') as f:
  81. f.write(final_content)
  82. print(f'Updated translation for {source_path} in {lang}')
  83. def main():
  84. previous_hashes = load_file_hashes()
  85. current_hashes = {}
  86. # Walk through all files in docs/modules
  87. for root, _, files in os.walk('docs/modules'):
  88. for file in files:
  89. if file.endswith(('.md', '.mdx')):
  90. filepath = os.path.join(root, file)
  91. current_hash = get_file_hash(filepath)
  92. current_hashes[filepath] = current_hash
  93. # Check if file is new or modified
  94. if (
  95. filepath not in previous_hashes
  96. or previous_hashes[filepath] != current_hash
  97. ):
  98. print(f'Change detected in {filepath}')
  99. for lang in LANGUAGES:
  100. process_file(filepath, lang)
  101. print('all files up to date, saving hashes')
  102. save_file_hashes(current_hashes)
  103. previous_hashes = current_hashes
  104. if __name__ == '__main__':
  105. main()