3.py 2.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081
  1. import pandas as pd
  2. # 定义文件路径和标题行位置
  3. INPUT_FILE_PATH = 'C:\\Users\\74262\\Desktop\\excel\\18.JP家具.csv' # Replace with your actual file path
  4. OUTPUT_FILE_PATH = 'C:\\Users\\74262\\Desktop\\excel\\18.JP家具_processed.csv' # Replace with your desired output file path
  5. HEADER_ROW = 1 # 标题行位置,默认为第一行(索引从0开始)
  6. def load_data(file_path, header_row):
  7. """加载数据表,并根据指定行作为标题"""
  8. try:
  9. if file_path.endswith('.csv'):
  10. df = pd.read_csv(file_path, header=header_row)
  11. elif file_path.endswith(('.xls', '.xlsx')):
  12. df = pd.read_excel(file_path, header=header_row)
  13. else:
  14. raise ValueError("Unsupported file format.")
  15. return df
  16. except Exception as e:
  17. print(f"Failed to load data from {file_path}. Error: {e}")
  18. return None
  19. def find_keyword_columns(df, keyword):
  20. """查找包含关键词的列"""
  21. keyword_columns = [col for col in df.columns if keyword.lower() in str(col).lower()]
  22. return keyword_columns
  23. def mock_translate(text):
  24. """模拟翻译函数,返回测试文本"""
  25. return f"{text} (translated)"
  26. def translate_column(df, column_name):
  27. """翻译指定列的内容"""
  28. df[column_name] = df[column_name].apply(lambda x: mock_translate(x) if isinstance(x, str) else x)
  29. return df
  30. def process_and_save(df, output_file_path):
  31. """处理并保存数据到新文件"""
  32. try:
  33. if output_file_path.endswith('.csv'):
  34. df.to_csv(output_file_path, index=False)
  35. elif output_file_path.endswith(('.xls', '.xlsx')):
  36. df.to_excel(output_file_path, index=False)
  37. else:
  38. raise ValueError("Unsupported file format for saving.")
  39. print(f"Translated table saved to {output_file_path}")
  40. except Exception as e:
  41. print(f"Failed to save data to {output_file_path}. Error: {e}")
  42. def main():
  43. # 加载数据
  44. df = load_data(INPUT_FILE_PATH, HEADER_ROW)
  45. if df is None:
  46. return
  47. # 查找包含关键词的列
  48. keyword = "类别" # 替换为实际的关键词
  49. keyword_columns = find_keyword_columns(df, keyword)
  50. if not keyword_columns:
  51. print("No columns found with the specified keyword.")
  52. return
  53. print(f"Found columns: {keyword_columns}")
  54. # 翻译找到的列
  55. for column in keyword_columns:
  56. print(f"Translating column: {column}")
  57. df = translate_column(df, column)
  58. # 保存处理后的数据到新的文件
  59. process_and_save(df, OUTPUT_FILE_PATH)
  60. if __name__ == "__main__":
  61. main()