| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556 |
- import os
- from markitdown import MarkItDown
- def parse_xlsx_from_url(url):
- """
- 使用 markitdown 库从给定的 URL 解析 XLSX 文件内容
-
- Args:
- url (str): XLSX 文件的 URL 或本地路径
-
- Returns:
- str: 解析后的 Markdown 格式内容
- """
- try:
- # 创建 MarkItDown 实例
- md = MarkItDown(enable_plugins=False)
-
- # 转换文档
- result = md.convert(url)
-
- # 获取 Markdown 格式内容
- markdown_content = result.text_content
-
- return markdown_content
-
- except Exception as e:
- print(f"解析 XLSX 文件时发生错误: {e}")
- return None
- def test_parse_xlsx():
- """
- 测试函数:解析 XLSX 文件
- 1. http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/1P镊子压刀.xlsx
- 2. http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/3P一体不锈钢迷你园艺铲.xlsx
- 3. http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/磁吸固定夹.xlsx
- 4. http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/锯齿固定夹.xlsx
- 5. http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/魔术贴金属扣.xlsx
- 6. http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/黑白轧带.xlsx
- """
- # 示例 URL(可以是本地文件路径或网络 URL)
- test_url = "http://s3.vs1.lan/public/amazone/copywriting_production/product/202508/1P镊子压刀.xlsx" # 替换为实际的 XLSX 文件 URL
-
- # 如果有本地测试文件,可以使用本地路径
- # test_url = "path/to/local/test.xlsx"
-
- print(f"正在解析 URL: {test_url}")
- content = parse_xlsx_from_url(test_url)
-
- if content:
- print("解析成功!内容如下:")
- print(content)
- else:
- print("解析失败")
- if __name__ == "__main__":
- test_parse_xlsx()
|