|
|
@@ -34,13 +34,15 @@ class SearchManager:
|
|
|
html_path = self.save_page(keyword, start)
|
|
|
|
|
|
# 保存到数据库
|
|
|
- return self.db_manager.save_search_result(
|
|
|
+ restult = self.db_manager.save_search_result(
|
|
|
keyword=keyword,
|
|
|
start=start,
|
|
|
url=url,
|
|
|
html_path=str(html_path)
|
|
|
)
|
|
|
|
|
|
+ return html_path
|
|
|
+
|
|
|
async def next_page(self, keyword: str, current_start: int) -> list[str]:
|
|
|
"""翻到下一页并返回结果链接
|
|
|
|
|
|
@@ -61,6 +63,8 @@ class SearchManager:
|
|
|
|
|
|
save_path = save_dir / f"{start}.html"
|
|
|
save_to_file(self.page.html, save_path)
|
|
|
+
|
|
|
+ # 保存到数据库
|
|
|
return save_path
|
|
|
|
|
|
async def _process_page(self, url: str) -> CrawlResult:
|
|
|
@@ -78,18 +82,6 @@ class SearchManager:
|
|
|
search_elements = tree.xpath('//*[@id="search"]/*')
|
|
|
return len(search_elements) == 0
|
|
|
|
|
|
- def take_screenshot(self, save_path: Path) -> Path:
|
|
|
- """截图当前页面"""
|
|
|
- return self.page.get_screenshot(save_path)
|
|
|
-
|
|
|
- def check_cache(self, file_path: Path) -> bool:
|
|
|
- """检查缓存文件是否存在"""
|
|
|
- return file_path.exists()
|
|
|
-
|
|
|
- def load_from_cache(self, file_path: Path):
|
|
|
- """从缓存加载页面"""
|
|
|
- self.page.get(f"file://{file_path}")
|
|
|
-
|
|
|
def go_to_next_page(self) -> bool:
|
|
|
"""跳转到下一页"""
|
|
|
next_button = self.page.ele('#pnnext', timeout=1)
|