t_markitdown_parse_url_task.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import sys
  2. import os
  3. sys.path.append(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
  4. from src.flow_task.crawl_asin_flow import parse_url_to_markdown_task
  5. from prefect import flow
  6. def test_parse_url_task():
  7. """测试parse_url_to_markdown_task函数"""
  8. # 测试URL列表(来自t_markitdown_xlsx.py中的示例)
  9. test_urls = [
  10. "http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/1P镊子压刀.xlsx",
  11. "http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/3P一体不锈钢迷你园艺铲.xlsx",
  12. "http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/磁吸固定夹.xlsx",
  13. "http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/锯齿固定夹.xlsx",
  14. "http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/魔术贴金属扣.xlsx",
  15. "http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/黑白轧带.xlsx"
  16. ]
  17. # 选择第一个URL进行测试
  18. test_url = test_urls[0]
  19. print(f"正在测试URL: {test_url}")
  20. try:
  21. # 调用task
  22. result = parse_url_to_markdown_task.with_options(refresh_cache=True)(test_url)
  23. if result:
  24. print("解析成功!内容如下:")
  25. print(result)
  26. print("\n" + "="*50 + "\n")
  27. else:
  28. print("解析失败,返回结果为空")
  29. except Exception as e:
  30. print(f"测试过程中发生错误: {e}")
  31. import traceback
  32. traceback.print_exc()
  33. @flow(name="测试URL解析任务")
  34. def test_flow():
  35. """测试flow"""
  36. test_parse_url_task()
  37. if __name__ == "__main__":
  38. print("开始测试parse_url_to_markdown_task...")
  39. test_parse_url_task()
  40. print("测试完成")