| 123456789101112131415161718192021222324252627282930 |
- from lxml import html # 使用 lxml.html 模块
- # 读取 HTML 文件
- save_to_html_path = r'K:\code\upwork\zhang_crawl_bio\output\Acalypha+malabarica+essential+oil\40.html'
- with open(save_to_html_path, 'r', encoding='utf-8') as f:
- html_content = f.read()
- def is_search_result_empty(html_content: str) -> bool:
- '''
- 检查页面是否存在 id="search" 的元素
- 并检查其是否有子元素
- 如果没有子元素则返回 True 表示搜索结果为空
- '''
- tree = html.fromstring(html_content)
- search_elements = tree.xpath('//*[@id="search"]/*')
- return len(search_elements) == 0
- def has_rso_element(html_content: str) -> bool:
- '''检查页面是否存在 id="rso" 的元素'''
- tree = html.fromstring(html_content)
- rso_elements = tree.xpath('//*[@id="rso"]')
- return len(rso_elements) > 0
- def main():
- # print("Has rso element:", has_rso_element(html_content))
- print("Is search result empty:", is_search_result_empty(html_content))
- if __name__ == "__main__":
- main()
|