10 mesiacov pred · 5cd051d37c
--- a/function_calling/CONVENTIONS.md
+++ b/function_calling/CONVENTIONS.md
@@ -12,32 +12,8 @@ logger = logging.getLogger('excel_tool')
 
				 
			
 
				 ```
			
 
				 - 传参必须声明数据类型
			
 
				+- 将所有函数定义分解成最小化的任务，不要执行多个步骤在同一个函数里面。分解任务函数。必须只能最小化一个任务一个函数。
			
 
				 
			
 
				 # 本目录下的说明
			
 
				 
			
 
				 本地文件的操作（如读取 Excel 文件、写入翻译结果等），通过 Function Calling 的方式调用这些函数来处理本地文件。
			
 
				-
			
 
				-## 用户需求：
			
 
				-
			
 
				-1.0
			
 
				-- 在搜索词列的右边插入一列，在新一列中翻译成中文
			
 
				-- 在搜索词列点击单元格内容，可以直接跳转到亚马逊链接，网页是搜索词的搜索内容
			
 
				-```python
			
 
				-def create_hyperlink(value, base_url):
			
 
				-    """为给定的值创建亚马逊搜索页面的超链接"""
			
 
				-    return f'=HYPERLINK("{base_url}{quote(value)}&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99", "{value}")'
			
 
				-```
			
 
				-
			
 
				-
			
 
				-2.0
			
 
				-- 点击品牌相关的列，可以看到该品牌在亚马逊上的所有产品，具体是通过创建搜索链接实现的。
			
 
				-
			
 
				-3.0 
			
 
				-- 在“点击量最高的类别”的所有列中，每一列右边插入新列，翻译成中文
			
 
				-
			
 
				-4.0 
			
 
				-- 点击 ASIN 列的单元格，可以直接跳转到亚马逊链接，链接内容是 https://www.amazon.co.jp/dp/{asin} ，asin 就是单元格内容
			
 
				-
			
 
				-# 要求
			
 
				-
			
 
				-分解成一个个最小化的任务。适合Function Calling 调用，不要假设提到的列名、词语是固定值
			
--- a/function_calling/excel_processor.py
+++ b/function_calling/excel_processor.py
@@ -1,47 +1,74 @@
 
				-from typing import List, Dict
			
 
				-import pandas as pd
			
 
				-from urllib.parse import quote
			
 
				-from mylib.logging_config import setup_logging
			
 
				-import logging
			
 
				-
			
 
				-# Setup logging
			
 
				-setup_logging()
			
 
				-logger = logging.getLogger('excel_tool')
			
 
				-
			
 
				-class ExcelProcessor:
			
 
				-    def __init__(self, file_path: str):
			
 
				-        """Initialize Excel processor with file path"""
			
 
				-        self.file_path = file_path
			
 
				-        self.df = pd.read_excel(file_path)
			
 
				-        logger.info(f"Loaded Excel file: {file_path}")
			
 
				-
			
 
				-    def add_translation_column(self, column_name: str, translations: Dict[str, str]):
			
 
				-        """Add translated column next to specified column"""
			
 
				-        try:
			
 
				-            new_col = f"{column_name}_中文"
			
 
				-            self.df[new_col] = self.df[column_name].map(translations)
			
 
				-            logger.info(f"Added translation column for {column_name}")
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"Error adding translation column: {str(e)}")
			
 
				-            raise
			
 
				-
			
 
				-    def add_hyperlink_column(self, column_name: str, base_url: str):
			
 
				-        """Add hyperlink column for specified column"""
			
 
				-        try:
			
 
				-            new_col = f"{column_name}_链接"
			
 
				-            self.df[new_col] = self.df[column_name].apply(
			
 
				-                lambda x: f'=HYPERLINK("{base_url}{quote(x)}", "{x}")'
			
 
				-            )
			
 
				-            logger.info(f"Added hyperlink column for {column_name}")
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"Error adding hyperlink column: {str(e)}")
			
 
				-            raise
			
 
				-
			
 
				-    def save(self, output_path: str):
			
 
				-        """Save processed Excel file"""
			
 
				+import csv
			
 
				+import sys
			
 
				+import os
			
 
				+
			
 
				+# 添加项目根目录到Python路径
			
 
				+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
			
 
				+
			
 
				+from mylib.read_encoding_cvs import read_csv, detect_encoding
			
 
				+
			
 
				+def _column_letter_to_index(col):
			
 
				+    """将Excel列字母转换为索引（A=0, B=1,...）"""
			
 
				+    index = 0
			
 
				+    for char in col.upper():
			
 
				+        if not 'A' <= char <= 'Z':
			
 
				+            raise ValueError(f"无效的列字母: {col}")
			
 
				+        index = index * 26 + (ord(char) - ord('A') + 1)
			
 
				+    return index - 1
			
 
				+
			
 
				+def _read_csv_file(csv_file):
			
 
				+    """读取CSV文件并返回数据"""
			
 
				+    encoding = detect_encoding(csv_file)
			
 
				+    return read_csv(csv_file, specified_encoding=encoding)
			
 
				+
			
 
				+def _get_column_index(headers, column_name):
			
 
				+    """获取指定列的索引"""
			
 
				+    try:
			
 
				+        # 先尝试作为列字母处理
			
 
				+        ref_index = _column_letter_to_index(column_name)
			
 
				+        if ref_index >= len(headers):
			
 
				+            raise ValueError(f"列索引 {ref_index} 超出范围")
			
 
				+    except ValueError:
			
 
				+        # 如果不是列字母，尝试作为列名处理
			
 
				         try:
			
 
				-            self.df.to_excel(output_path, index=False)
			
 
				-            logger.info(f"Saved processed file to {output_path}")
			
 
				-        except Exception as e:
			
 
				-            logger.error(f"Error saving file: {str(e)}")
			
 
				-            raise
			
 
				+            ref_index = headers.index(column_name)
			
 
				+        except ValueError:
			
 
				+            raise ValueError(f"列 '{column_name}' 不存在")
			
 
				+    return ref_index
			
 
				+
			
 
				+def _insert_new_column(rows, headers, ref_index, new_column_name, position):
			
 
				+    """在指定位置插入新列"""
			
 
				+    insert_index = ref_index + 1 if position == 'right' else ref_index
			
 
				+    headers.insert(insert_index, new_column_name)
			
 
				+    for row in rows[1:]:
			
 
				+        row.insert(insert_index, '')
			
 
				+    return rows
			
 
				+
			
 
				+def _write_csv_file(output_file, rows):
			
 
				+    """将数据写入CSV文件"""
			
 
				+    with open(output_file, 'w', encoding='utf-8', newline='') as file:
			
 
				+        writer = csv.writer(file)
			
 
				+        writer.writerows(rows)
			
 
				+
			
 
				+def insert_column(csv_file, column_name, new_column_name, position='right'):
			
 
				+    """
			
 
				+    在指定列旁边插入空列
			
 
				+    
			
 
				+    Args:
			
 
				+        csv_file (str): CSV文件路径
			
 
				+        column_name (str): 参考列名或Excel列字母（如'A', 'B'）
			
 
				+        new_column_name (str): 新列名
			
 
				+        position (str): 插入位置，'left'或'right'，默认为'right'
			
 
				+    """
			
 
				+    rows = _read_csv_file(csv_file)
			
 
				+    headers = rows[0]
			
 
				+    
			
 
				+    ref_index = _get_column_index(headers, column_name)
			
 
				+    rows = _insert_new_column(rows, headers, ref_index, new_column_name, position)
			
 
				+    
			
 
				+    output_file = '/home/mrh/code/excel_tool/temp/测试_process.csv'
			
 
				+    _write_csv_file(output_file, rows)
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # 测试代码
			
 
				+    insert_column('/home/mrh/code/excel_tool/temp/测试.csv', 'B', 'New Column')
			
--- a/mylib/read_encoding_cvs.py
+++ b/mylib/read_encoding_cvs.py
@@ -5,8 +5,7 @@ from typing import List, Optional
 
				 import os
			
 
				 from mylib.logging_config import setup_logging
			
 
				 setup_logging()
			
 
				-logger = logging.getLogger(__name__)
			
 
				-logger.info(f"{__file__}")
			
 
				+logger = logging.getLogger("excel_tool" + '.' +  __name__)
			
 
				 
			
 
				 def detect_encoding(file_path: str, sample_size: int = 100000) -> str:
			
 
				     """检测文件编码
			
@@ -46,6 +45,47 @@ def detect_encoding(file_path: str, sample_size: int = 100000) -> str:
 
				         return 'shift_jis'  # 返回日文编码作为默认值
			
 
				 
			
 
				 
			
 
				+def save_csv(data: List[List[str]], file_path: str) -> None:
			
 
				+    """将CSV数据保存为UTF-8编码文件
			
 
				+    
			
 
				+    Args:
			
 
				+        data: 要保存的CSV数据
			
 
				+        file_path: 目标文件路径
			
 
				+    """
			
 
				+    try:
			
 
				+        with open(file_path, 'w', encoding='utf-8', newline='') as f:
			
 
				+            writer = csv.writer(f)
			
 
				+            writer.writerows(data)
			
 
				+        logger.info(f"File saved as UTF-8: {file_path}")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error saving file {file_path}: {e}")
			
 
				+        raise
			
 
				+
			
 
				+def read_with_cp936(file_path: str) -> List[List[str]]:
			
 
				+    """使用cp936编码读取CSV文件
			
 
				+    
			
 
				+    Args:
			
 
				+        file_path: 文件路径
			
 
				+        
			
 
				+    Returns:
			
 
				+        包含CSV数据的二维列表
			
 
				+    """
			
 
				+    try:
			
 
				+        logger.info(f"Reading file with cp936 encoding: {file_path}")
			
 
				+        with open(file_path, 'r', encoding='cp936', errors='replace') as f:
			
 
				+            reader = csv.reader(f)
			
 
				+            data = list(reader)
			
 
				+            
			
 
				+            # 保存转换后的文件
			
 
				+            output_file_path = file_path + '.utf8.csv'
			
 
				+            save_csv(data, output_file_path)
			
 
				+            
			
 
				+            return data
			
 
				+            
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"Error reading file with cp936 encoding: {e}")
			
 
				+        return []
			
 
				+
			
 
				 def read_csv(file_path: str, specified_encoding: Optional[str] = None) -> List[List[str]]:
			
 
				     """读取CSV文件并转换为utf-8编码
			
 
				     
			
@@ -56,9 +96,12 @@ def read_csv(file_path: str, specified_encoding: Optional[str] = None) -> List[L
 
				     Returns:
			
 
				         包含CSV数据的二维列表
			
 
				     """
			
 
				+    # 如果指定了cp936编码，直接使用专用函数
			
 
				+    if specified_encoding == 'cp936':
			
 
				+        return read_with_cp936(file_path)
			
 
				+        
			
 
				     # 常见编码列表，优先尝试日文编码
			
 
				     encodings_to_try = [
			
 
				-        'cp936',
			
 
				         'shift_jis',  # 日文常用编码
			
 
				         'cp932',      # Windows日文编码
			
 
				         'euc-jp',     # 日文EUC编码
			
@@ -101,13 +144,7 @@ def read_csv(file_path: str, specified_encoding: Optional[str] = None) -> List[L
 
				                         logger.warning(f"Japanese characters not detected with {encoding}, trying next encoding")
			
 
				                         continue
			
 
				                 
			
 
				-                # 将数据写入新的utf-8编码文件
			
 
				-                output_file_path = file_path + '.utf8.csv'
			
 
				-                with open(output_file_path, 'w', encoding='utf-8', newline='') as f_out:
			
 
				-                    writer = csv.writer(f_out)
			
 
				-                    writer.writerows(data)
			
 
				                 
			
 
				-                logger.info(f"File saved as UTF-8: {output_file_path}")
			
 
				                 return data
			
 
				                 
			
 
				         except UnicodeDecodeError as e:
			
@@ -121,11 +158,12 @@ def read_csv(file_path: str, specified_encoding: Optional[str] = None) -> List[L
 
				     return []  # 返回空列表而不是退出
			
 
				 
			
 
				 def main():
			
 
				-    from logging_config import setup_logging
			
 
				-    setup_logging()
			
 
				-    logging.getLogger(__name__).setLevel(logging.DEBUG)
			
 
				     file_path = "/home/mrh/code/excel_tool/temp/测试.csv"
			
 
				-    read_csv(file_path, 'cp936')
			
 
				-
			
 
				+    data =read_csv(file_path, 'cp936')
			
 
				+    # 保存转换后的文件
			
 
				+    # output_file_path = file_path + '.utf8.csv'
			
 
				+    # save_csv(data, output_file_path)
			
 
				+    logger.info(data)
			
 
				+    
			
 
				 if __name__ == "__main__":
			
 
				     main()
			
--- a/user_requiement_doc.md
+++ b/user_requiement_doc.md
@@ -15,6 +15,7 @@
 
				 | 429          | 除草剤   | 住友化学園芸＼ＳＵＭＩＴＯＭＯ　ＣＨＥＭＩＣＡＬ　ＧＡＲＤＥＮ　ＰＲＯＤＵＣＴＳ | Iris Ohyama         | CLEAN HOUSE         | Home Improvement    | Biss                | Lawn and Garden     | B007RLHLE8                    | アイリスオーヤマ(IRIS OHYAMA) 除草剤 速効除草剤 4L 液体タイプ そのまま使える 大容量 お徳用 SJS-4L | 18.84                         | 18.44                         | B076BMMRJG                    | 住友化学園芸 除草剤 草退治メガロングシャワー4L 強力 持続 最大散布400㎡ 液体 | 6.97                          | 5.05                          | B07JVRXMWJ                    | アース製薬(Earth Chemical) アースガーデン 除草剤 アースカマイラズ 草消滅 [4.5L] ガーデニング ジョウロヘッド(シャワー タイプ) 家庭用 園芸 雑草 除去 除草剤 | 5.83                          | 5.06                          | 2024/9/30  |
			
 
				 | 625          | 草刈り機 | Makita              | ICEHAIL             | YAMAZEN             | Lawn and Garden     | Home Improvement    | Automotive          | B0CXPNC57G                    | ICEHAIL 電動 草刈り機 充電式 草刈機 低騒音 20000rpm 1.2kg軽量 コードレス セキュリティ設計 伸縮 角度調整 小型ハンディ保管が楽 男と女性高齢者初心者御勧め 2000mAh大容量バッテリー2個 多地形4種類替刃枝切り 芝生 畑 雑草 植木 菜園 充電式刈払機 農業 家庭用芝刈り機 園芸用機器 掃除が楽 日本語説 | 4.67                          | 2.42                          | B08KDYFCWW                    | ONE STEP 充電式草刈機 草刈り機 コードレス 2024最新型 ナイロンカッター 伸縮 角度調整 枝切り 芝生 畑 雑草 植木 刈払機 軽量 バッテリー付き (草刈機+バッテリー2個) | 3.65                          | 3.12                          | B0B5DBWMD9                    | マキタ(Makita) 260mm充電式草刈機 18V バッテリ・充電器別売 MUR193DZ | 3.45                          | 0.54                          | 2024/9/30  |
			
 
				 
			
 
				+
			
 
				 ## 用户需求：
			
 
				 
			
 
				 1.0
			
@@ -26,7 +27,6 @@ def create_hyperlink(value, base_url):
 
				     return f'=HYPERLINK("{base_url}{quote(value)}&__mk_zh_CN=%E4%BA%9A%E9%A9%AC%E9%80%8A%E7%BD%91%E7%AB%99", "{value}")'
			
 
				 ```
			
 
				 
			
 
				-
			
 
				 2.0
			
 
				 - 点击品牌相关的列，可以看到该品牌在亚马逊上的所有产品，具体是通过创建搜索链接实现的。
			
 
				 
			
@@ -34,4 +34,8 @@ def create_hyperlink(value, base_url):
 
				 - 在“点击量最高的类别”的所有列中，每一列右边插入新列，翻译成中文
			
 
				 
			
 
				 4.0 
			
 
				-- 点击 ASIN 列的单元格，可以直接跳转到亚马逊链接，链接内容是 https://www.amazon.co.jp/dp/{asin} ，asin 就是单元格内容
			
 
				+- 点击 ASIN 列的单元格，可以直接跳转到亚马逊链接，链接内容是 https://www.amazon.co.jp/dp/{asin} ，asin 就是单元格内容
			
 
				+
			
 
				+# 要求
			
 
				+
			
 
				+分解成一个个最小化的任务。适合Function Calling 调用，不要假设提到的列名、词语是固定值