Преглед изворни кода

refactor: move database logic to DatabaseManager and add duplicate checking

mrh (aider) пре 10 месеци
родитељ
комит
4ce06ce2c8
2 измењених фајлова са 15 додато и 5 уклоњено
  1. 14 0
      database/sql_model.py
  2. 1 5
      mylib/search_manager.py

+ 14 - 0
database/sql_model.py

@@ -23,8 +23,22 @@ class DatabaseManager:
     def __init__(self):
         self.engine = engine
         
+    def get_existing_result(self, keyword: str, start: int) -> SearchResult | None:
+        """获取已存在的搜索结果"""
+        with Session(self.engine) as session:
+            return session.exec(
+                select(SearchResult)
+                .where(SearchResult.keyword == keyword)
+                .where(SearchResult.start == start)
+            ).first()
+            
     def save_search_result(self, keyword: str, start: int, url: str, html_path: str) -> SearchResult:
         """保存搜索结果到数据库"""
+        # 检查是否已存在
+        existing = self.get_existing_result(keyword, start)
+        if existing:
+            return existing
+            
         with Session(self.engine) as session:
             result = SearchResult(
                 keyword=keyword,

+ 1 - 5
mylib/search_manager.py

@@ -34,15 +34,13 @@ class SearchManager:
         html_path = self.save_page(keyword, start)
         
         # 保存到数据库
-        restult = self.db_manager.save_search_result(
+        return self.db_manager.save_search_result(
             keyword=keyword,
             start=start,
             url=url,
             html_path=str(html_path)
         )
         
-        return html_path
-        
     async def next_page(self, keyword: str, current_start: int) -> list[str]:
         """翻到下一页并返回结果链接
         
@@ -63,8 +61,6 @@ class SearchManager:
         
         save_path = save_dir / f"{start}.html"
         save_to_file(self.page.html, save_path)
-        
-        # 保存到数据库
         return save_path
         
     async def _process_page(self, url: str) -> CrawlResult: