há 1 ano atrás · 9218476fd9
--- a/.env
+++ b/.env
@@ -1,3 +1,4 @@
 
				 OPENAI_API_KEY='sk-NscqaCD1PfVm7soEF3C3E6297bE14d7fB595Be8f17F39aFf'
			
 
				 OPENAI_API_BASE='https://aiapi.magong.site/v1'
			
 
				-LLM_MODEL="deepseek-chat"
			
 
				+LLM_MODEL="deepseek-chat"
			
 
				+DEBUG=True
			
--- a/brand_add_url_link.py
+++ b/brand_add_url_link.py
@@ -3,12 +3,12 @@ from urllib.parse import quote
 
				 from pathlib import Path
			
 
				 
			
 
				 def create_hyperlink(value, base_url):
			
 
				-    """为给定的值创建亚马逊搜索页面的超链接，并添加蓝色下划线样式"""
			
 
				-    return f'=HYPERLINK("{base_url}{quote(value)}&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99", "[蓝色下划线] {value}")'
			
 
				+    """为给定的值创建亚马逊搜索页面的超链接，"""
			
 
				+    return f'=HYPERLINK("{base_url}{quote(value)}&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99", "{value}")'
			
 
				 
			
 
				 def create_asin_link(asin):
			
 
				-    """为ASIN创建产品详情页链接，并添加蓝色下划线样式"""
			
 
				-    return f'=HYPERLINK("https://www.amazon.co.jp/dp/{asin}", "[蓝色下划线] {asin}")'
			
 
				+    """为ASIN创建产品详情页链接，"""
			
 
				+    return f'=HYPERLINK("https://www.amazon.co.jp/dp/{asin}", "{asin}")'
			
 
				 
			
 
				 def process_row(row, brand_indices, asin_indices, base_url):
			
 
				     """为指定索引的单元格添加超链接"""
			
--- a/mylib/pdfzh_translator.py
+++ b/mylib/pdfzh_translator.py
@@ -56,7 +56,7 @@ class GoogleTranslator(BaseTranslator):
 
				 
			
 
				 
			
 
				 class OpenAITranslator(BaseTranslator):
			
 
				-    def __init__(self, service, lang_out, lang_in, model, max_tokens=2000):
			
 
				+    def __init__(self, service='opeanai', lang_out='zh-CN', lang_in='auto', model=os.getenv('LLM_MODEL'), max_tokens=2000):
			
 
				         lang_out = "zh-CN" if lang_out == "auto" else lang_out
			
 
				         lang_in = "en" if lang_in == "auto" else lang_in
			
 
				         super().__init__(service, lang_out, lang_in, model)
			
@@ -75,7 +75,7 @@ class OpenAITranslator(BaseTranslator):
 
				 
			
 
				     def _single_translate(self, text) -> str:
			
 
				         response = self.client.chat.completions.create(
			
 
				-            model=os.getenv('LLM_MODEL', self.model),
			
 
				+            model=self.model,
			
 
				             **self.options,
			
 
				             messages=[
			
 
				                 {
			
@@ -155,3 +155,11 @@ if __name__ == "__main__":
 
				     print("\nBatch translation results:")
			
 
				     for original, translated in zip(batch_texts, translated_batch):
			
 
				         print(f"{original} -> {translated}")
			
 
				+
			
 
				+'''
			
 
				+translator = OpenAITranslator("openai", "zh-CN", "en", "openai/deepseek-chat")                                           
			
 
				+ # 单个翻译                                                                                                        
			
 
				+ result = translator.translate("Hello world")                                                                      
			
 
				+ # 批量翻译                                                                                                        
			
 
				+ results = translator.translate(["apple", "banana", "orange"])     
			
 
				+'''
			
--- a/mylib/read_encoding_cvs.py
+++ b/mylib/read_encoding_cvs.py
@@ -0,0 +1,42 @@
 
				+import csv
			
 
				+import csv
			
 
				+import chardet
			
 
				+import sys
			
 
				+import logging
			
 
				+
			
 
				+logger = logging.getLogger(__name__)
			
 
				+
			
 
				+
			
 
				+def detect_encoding(file_path):
			
 
				+    try:
			
 
				+        with open(file_path, 'rb') as f:
			
 
				+            raw_data = f.read()
			
 
				+            result = chardet.detect(raw_data)
			
 
				+            return result['encoding']
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error detecting encoding for {file_path}: {e}")
			
 
				+        sys.exit(1)
			
 
				+
			
 
				+
			
 
				+
			
 
				+def read_csv(file_path, to_encode=''):
			
 
				+    encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
			
 
				+    detected_encoding = detect_encoding(file_path)
			
 
				+    logger.info(f"Detected encoding: {detected_encoding}")
			
 
				+    
			
 
				+    if detected_encoding:
			
 
				+        encodings_to_try.insert(0, detected_encoding)
			
 
				+    
			
 
				+    for encoding in encodings_to_try:
			
 
				+        try:
			
 
				+            with open(file_path, 'r', encoding=encoding) as f:
			
 
				+                reader = csv.reader(f)
			
 
				+                return list(reader)
			
 
				+        except UnicodeDecodeError:
			
 
				+            continue
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"Error with encoding {encoding}: {e}")
			
 
				+            continue
			
 
				+    
			
 
				+    logger.error("Failed to read file with all attempted encodings")
			
 
				+    sys.exit(1)
			
--- a/process_data.py
+++ b/process_data.py
@@ -11,37 +11,6 @@ from mylib.logging_config import setup_logging
 
				 setup_logging()
			
 
				 logger = logging.getLogger(__name__)
			
 
				 
			
 
				-def detect_encoding(file_path):
			
 
				-    try:
			
 
				-        with open(file_path, 'rb') as f:
			
 
				-            raw_data = f.read()
			
 
				-            result = chardet.detect(raw_data)
			
 
				-            return result['encoding']
			
 
				-    except Exception as e:
			
 
				-        logger.error(f"Error detecting encoding for {file_path}: {e}")
			
 
				-        sys.exit(1)
			
 
				-
			
 
				-def read_csv(file_path):
			
 
				-    encodings_to_try = ['utf-8-sig', 'gb18030', 'shift_jis', 'euc-jp']
			
 
				-    detected_encoding = detect_encoding(file_path)
			
 
				-    logger.info(f"Detected encoding: {detected_encoding}")
			
 
				-    
			
 
				-    if detected_encoding:
			
 
				-        encodings_to_try.insert(0, detected_encoding)
			
 
				-    
			
 
				-    for encoding in encodings_to_try:
			
 
				-        try:
			
 
				-            with open(file_path, 'r', encoding=encoding) as f:
			
 
				-                reader = csv.reader(f)
			
 
				-                return list(reader)
			
 
				-        except UnicodeDecodeError:
			
 
				-            continue
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"Error with encoding {encoding}: {e}")
			
 
				-            continue
			
 
				-    
			
 
				-    logger.error("Failed to read file with all attempted encodings")
			
 
				-    sys.exit(1)
			
 
				 
			
 
				 def add_brand_asin_links(data, brand_indices, asin_indices):
			
 
				     """为品牌和ASIN列添加链接"""
			
--- a/tests/add_col_translate.py
+++ b/tests/add_col_translate.py
@@ -0,0 +1,13 @@
 
				+from pathlib import Path
			
 
				+from mylib.translate_utils import process_batch_translations
			
 
				+from mylib.read_encoding_cvs import read_csv
			
 
				+def main():
			
 
				+    output_dir = Path('temp')
			
 
				+    input_file = output_dir/"测试.csv"
			
 
				+    output_file = output_dir/"processed_测试.csv"
			
 
				+    data = read_csv(input_file)
			
 
				+    process_batch_translations(data, search_term_index, category_indices) 
			
 
				+    
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    main()
			
--- a/user_requiement_doc.md
+++ b/user_requiement_doc.md
@@ -26,7 +26,7 @@
 
				 brand_add_url_link.py 已经实现 `create_hyperlink`
			
 
				 
			
 
				 3.0 
			
 
				-- 在所有类别的列中，右边插入新列，翻译成中文
			
 
				+- 在“点击量最高的类别”的所有列中，每一列右边插入新列，翻译成中文
			
 
				 
			
 
				 4.0 
			
 
				 - 点击 ASIN 列的单元格，可以直接跳转到亚马逊链接，链接内容是 https://www.amazon.co.jp/dp/{asin} ，asin 就是单元格内容