|
|
@@ -8,71 +8,69 @@ import os
|
|
|
import sys
|
|
|
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
|
|
|
|
|
-from sqlmodel import Field, SQLModel,Column, Integer, Sequence, UniqueConstraint
|
|
|
+from sqlmodel import Field, SQLModel,Column, Integer, Sequence, UniqueConstraint
|
|
|
from config import DB_URL,logger
|
|
|
# from db.common import engine
|
|
|
from sqlalchemy.dialects.postgresql import insert
|
|
|
from sqlalchemy.sql.sqltypes import Integer, String, DateTime
|
|
|
from sqlalchemy.sql.schema import Column
|
|
|
from sqlalchemy import UniqueConstraint
|
|
|
+from pydantic import UUID4
|
|
|
+import uuid
|
|
|
from db.base import BaseRepository,DouyinBaseRepository
|
|
|
from db.engine import engine,create_all
|
|
|
|
|
|
|
|
|
|
|
|
class Categories(SQLModel,DouyinBaseRepository, table=True):
|
|
|
- id: int = Field(primary_key=True) # 分类的唯一标识符
|
|
|
+ id: UUID4 = Field(default_factory=uuid.uuid1, primary_key=True) # 使用 UUID v1 作为主键
|
|
|
open_id: str = Field(foreign_key="useroauthtoken.open_id",index=True) # 关联到用户表的外键
|
|
|
name: str = Field(default="default", index=True) # 分类的名称,添加索引以优化查询性能
|
|
|
update_time: datetime = Field(default_factory=datetime.now) # 创建时间、更新时间
|
|
|
# 添加联合唯一约束
|
|
|
- __table_args__ = (UniqueConstraint('open_id', 'name', name='uq_open_id_name'),)
|
|
|
+ __table_args__ = (UniqueConstraint('open_id', 'name', name='uq_open_id_ctname'),)
|
|
|
|
|
|
|
|
|
|
|
|
-class DocumentCategories(SQLModel, table=True):
|
|
|
- document_id: int = Field(foreign_key="documents.id", primary_key=True) # 关联到文档表的外键
|
|
|
- category_id: int = Field(foreign_key="categories.id", primary_key=True) # 关联到分类表的外键
|
|
|
-
|
|
|
+class DocumentCategories(SQLModel, table=True):
|
|
|
+ id: UUID4 = Field(foreign_key="documents.id",index=True, primary_key=True) # 关联到文档表的外键
|
|
|
+ category_id: UUID4 = Field(foreign_key="categories.id",index=True) # 关联到分类表的外键
|
|
|
+ __table_args__ = (UniqueConstraint('id', 'category_id', ),)
|
|
|
+
|
|
|
class DocStatus:
|
|
|
UNPROCESSED = 0 # 未处理
|
|
|
COMPLETED = 100 # 已完成
|
|
|
DISABLED = -1 # 禁用
|
|
|
|
|
|
class Documents(SQLModel, table=True):
|
|
|
- id: Optional[int] = Field(primary_key=True)
|
|
|
+ id: UUID4 = Field(default_factory=uuid.uuid1, primary_key=True,index=True) # 使用 UUID v1 作为主键
|
|
|
open_id: str = Field(foreign_key="useroauthtoken.open_id",index=True) # 关联到用户表的外键
|
|
|
path: str = Field(nullable=False, index=True) # 相对路径
|
|
|
status: int = Field(nullable=False) # 文档状态
|
|
|
update_time: datetime = Field(default_factory=datetime.now) # 创建时间、更新时间
|
|
|
- __table_args__ = (UniqueConstraint('path'),)
|
|
|
+ __table_args__ = (UniqueConstraint('open_id', 'path', name='uq_documents'),)
|
|
|
|
|
|
class CategoriesRepository(DouyinBaseRepository):
|
|
|
def __init__(self, engine=engine):
|
|
|
super().__init__(Categories, engine)
|
|
|
-
|
|
|
+
|
|
|
+ async def aexec_add_or_update_categ(self, open_id, category_name, session):
|
|
|
+ categ_model = Categories(open_id=open_id,name=category_name)
|
|
|
+ await self.aon_conflict_do_nothing(categ_model, session)
|
|
|
+ if categ_model.id:
|
|
|
+ logger.debug(f"{open_id} add new category:{category_name}")
|
|
|
+ else:
|
|
|
+ logger.debug(f"{open_id} already have same name:{category_name}")
|
|
|
+ return categ_model
|
|
|
+
|
|
|
|
|
|
class DocumentCategoriesRepository(DouyinBaseRepository):
|
|
|
def __init__(self, engine=engine):
|
|
|
super().__init__(DocumentCategories, engine)
|
|
|
|
|
|
-
|
|
|
-
|
|
|
+
|
|
|
class DocumentsRepository(DouyinBaseRepository):
|
|
|
- def __init__(self, open_id, file_path, category_name="default", engine=engine):
|
|
|
- # file_path = {DATA_DIR}/{open_id}/docs/xxx/example_file.pdf
|
|
|
- relative_path = DocumentsRepository.get_relative_path(file_path)
|
|
|
- if relative_path == None:
|
|
|
- return
|
|
|
- self.doc_model = Documents(
|
|
|
- open_id=open_id,
|
|
|
- path=relative_path,
|
|
|
- status=DocStatus.UNPROCESSED,
|
|
|
- )
|
|
|
- self.category_model = Categories(
|
|
|
- open_id=open_id,
|
|
|
- name=category_name
|
|
|
- )
|
|
|
+ def __init__(self, engine=engine):
|
|
|
super().__init__(Documents, engine)
|
|
|
|
|
|
def get_relative_path(full_path):
|
|
|
@@ -83,48 +81,54 @@ class DocumentsRepository(DouyinBaseRepository):
|
|
|
else:
|
|
|
logger.error(f"Can not get rel path:{full_path}")
|
|
|
|
|
|
- async def add_document_with_categories(self):
|
|
|
- document_id = await self.aon_conflict_do_nothing(self.doc_model)
|
|
|
- logger.debug(f"document_id:{document_id}")
|
|
|
- cr = CategoriesRepository()
|
|
|
- category_id = await cr.aon_conflict_do_nothing(self.category_model)
|
|
|
- logger.debug(f"category_id:{category_id}")
|
|
|
- doc_categ_model = DocumentCategories(document_id, category_id)
|
|
|
- dr = DocumentCategoriesRepository()
|
|
|
- dr.aon_conflict_do_nothing(doc_categ_model)
|
|
|
+ # file_path = {DATA_DIR}/{open_id}/docs/xxx/example_file.pdf
|
|
|
+ async def add_document_with_categories(self, open_id, file_path, category_name="default"):
|
|
|
+ async with self.session_factory() as session:
|
|
|
+ doc_model:Documents = await self.aexec_add_or_update_file(open_id, file_path, session)
|
|
|
+
|
|
|
+ cr = CategoriesRepository()
|
|
|
+ category_model = await cr.aexec_add_or_update_categ(open_id, category_name,session)
|
|
|
+ logger.debug(f"category_id:{category_model}")
|
|
|
+
|
|
|
+ if doc_model.id is not None and category_model.id is not None:
|
|
|
+ doc_categ_model = DocumentCategories(id=doc_model.id, category_id=category_model.id)
|
|
|
+ dr = DocumentCategoriesRepository()
|
|
|
+ logger.info(doc_categ_model)
|
|
|
+ await dr.aon_conflict_do_nothing(doc_categ_model, session)
|
|
|
+ await session.commit()
|
|
|
+ else:
|
|
|
+ logger.info("DocumentCategories no change.")
|
|
|
return
|
|
|
- # 添加或更新文档
|
|
|
- await self.add_or_update_document(new_document.model_dump(), "document_id")
|
|
|
-
|
|
|
- # 获取已存在的分类
|
|
|
- categories_repo = CategoriesRepository()
|
|
|
- existing_categories = await categories_repo.get_all_by_ids(category_ids)
|
|
|
- existing_category_ids = {category.category_id for category in existing_categories}
|
|
|
-
|
|
|
- # 添加不存在的分类
|
|
|
- for category_id in set(category_ids) - existing_category_ids:
|
|
|
- new_category = Categories(open_id=new_document.open_id, category_id=category_id, category_name=f"Category_{category_id}") # 假设名称由 ID 生成
|
|
|
- await categories_repo.add([new_category])
|
|
|
-
|
|
|
- # 创建并添加文档分类关联关系
|
|
|
- document_categories_to_add = []
|
|
|
- for category_id in category_ids:
|
|
|
- doc_cat = DocumentCategories(document_id=new_document.document_id, category_id=category_id)
|
|
|
- document_categories_to_add.append(doc_cat)
|
|
|
-
|
|
|
- # 添加文档分类关联关系到数据库
|
|
|
- document_categories_repo = DocumentCategoriesRepository()
|
|
|
- await document_categories_repo.add(document_categories_to_add)
|
|
|
-
|
|
|
+
|
|
|
+ async def aexec_add_or_update_file(self, open_id, file_path, session):
|
|
|
+ relative_path = DocumentsRepository.get_relative_path(file_path)
|
|
|
+ if relative_path == None:
|
|
|
+ return
|
|
|
+ self.instance_model = Documents(
|
|
|
+ open_id=open_id,
|
|
|
+ path=relative_path,
|
|
|
+ status=DocStatus.UNPROCESSED,
|
|
|
+ )
|
|
|
+ # 在同一个 open_id 用户层面上,如果 relative_path 相同,则产生冲突,仅仅更新时间。说明 file_path 同路径下覆盖了新文件
|
|
|
+ # 没有产生冲突,说明不同用户或不同路径下新增了文件
|
|
|
+ document_model:Documents = await self.aon_conflict_do_update(self.instance_model, session)
|
|
|
+ res = self.aget(open_id=open_id, file_path=file_path)
|
|
|
+ logger.info(f"get doc row:{res}")
|
|
|
+ if document_model.id:
|
|
|
+ logger.debug(f"{document_model.open_id} add new file:{document_model.path}")
|
|
|
+ else:
|
|
|
+ logger.debug(f"{document_model.open_id} overwrite file:{document_model.path}")
|
|
|
+ return document_model
|
|
|
+
|
|
|
# 示例使用
|
|
|
async def main():
|
|
|
from db.user import test_add
|
|
|
open_id = await test_add()
|
|
|
# 创建实例
|
|
|
- categories_repo = CategoriesRepository()
|
|
|
- documents_repo = DocumentsRepository(open_id,"/home/user/code/open-douyin/open_id/docs/readme2.md")
|
|
|
- document_categories_repo = DocumentCategoriesRepository()
|
|
|
- await documents_repo.add_document_with_categories()
|
|
|
+ documents_repo = DocumentsRepository()
|
|
|
+ res = await documents_repo.aget(id=1)
|
|
|
+ logger.info(res)
|
|
|
+ # await documents_repo.add_document_with_categories(open_id,"/home/user/code/open-douyin/open_id/docs/readme.md")
|
|
|
# 添加分类
|
|
|
# doc1 = Documents(open_id=open_id, document_name="docs_fn", status="ready", file_path="/path")
|
|
|
# doc2 = Documents(open_id=open_id, document_name="docs_jj", status="ready", file_path="/path")
|