xpath_search.py 1.0 KB

123456789101112131415161718192021222324252627282930
  1. from lxml import html # 使用 lxml.html 模块
  2. # 读取 HTML 文件
  3. save_to_html_path = r'K:\code\upwork\zhang_crawl_bio\output\Acalypha+malabarica+essential+oil\40.html'
  4. with open(save_to_html_path, 'r', encoding='utf-8') as f:
  5. html_content = f.read()
  6. def is_search_result_empty(html_content: str) -> bool:
  7. '''
  8. 检查页面是否存在 id="search" 的元素
  9. 并检查其是否有子元素
  10. 如果没有子元素则返回 True 表示搜索结果为空
  11. '''
  12. tree = html.fromstring(html_content)
  13. search_elements = tree.xpath('//*[@id="search"]/*')
  14. return len(search_elements) == 0
  15. def has_rso_element(html_content: str) -> bool:
  16. '''检查页面是否存在 id="rso" 的元素'''
  17. tree = html.fromstring(html_content)
  18. rso_elements = tree.xpath('//*[@id="rso"]')
  19. return len(rso_elements) > 0
  20. def main():
  21. # print("Has rso element:", has_rso_element(html_content))
  22. print("Is search result empty:", is_search_result_empty(html_content))
  23. if __name__ == "__main__":
  24. main()