comment.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. import asyncio
  2. import hashlib
  3. import json
  4. from typing import List
  5. import os
  6. import sys
  7. sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  8. from db.comment import CommentRepository
  9. from db.docs import CategoriesRepository,Categories
  10. from db.user_oauth import UserOAuthRepository,UserOAuthToken
  11. from config import logger,TEMP_DIR,MNT_DOUYIN_DATA
  12. from grpc_m.send_data_to_vector import langchain_chat,simarity_search
  13. import douyin.comment_reply
  14. from fastapi import FastAPI,APIRouter, File, HTTPException
  15. from db.comment import CommentContent,Events,EventRepository
  16. from db.user_info import UserInfo,UserInfoRepository
  17. from db.docs import DocumentsRepository,Documents
  18. from db.user import User,UserRepo,engine
  19. from db.video_data import VideoData,VideoItemDocRepo
  20. from grpc_m.client import get_doc_vector,save_doc_vector
  21. from sqlmodel import Field, SQLModel,Relationship,create_engine,Session,select,func,Column
  22. from tool.video_get import download_video_by_item_id,get_video_download_urls,download_video
  23. from tool.exec_asr_client import run_asr_client
  24. from enum import Enum
  25. from tool.job_video_asr import video_item_to_vector
  26. async def get_video_promt(user:User, event_model:Events):
  27. video_summarize = ""
  28. content = ""
  29. video_item = VideoItemDocRepo().get_or_create(user, event_model.content.reply_to_item_id)
  30. # 该视频没有提取文案和向量计算
  31. if not video_item.doc or video_item.doc.status != 100:
  32. # video_to_txt_path = os.path.join(MNT_DOUYIN_DATA, user.open_id, "docs", "_video", f"{event_model.id}.txt")
  33. # video_item = VideoItemDocRepo().add_vedio_item_doc(user, event_model.content.reply_to_item_id, video_to_txt_path)
  34. asyncio.create_task(video_item_to_vector(video_item))
  35. return ""
  36. else:
  37. if video_item.doc.summarize:
  38. video_summarize = f"<summarize>{video_item.doc.summarize}<summarize/>"
  39. doc_query_res = await get_doc_vector(video_item.doc.id, event_model.content.content)
  40. content = ""
  41. chunks = ""
  42. for item in doc_query_res:
  43. chunk_data = item.chunk
  44. metadata_dict = dict(item.metadata)
  45. score_value = item.score
  46. uuid_value = item.uuid
  47. chunks += f'<chunks score="{item.score}">{item.chunk}<chunks/>'
  48. if chunks:
  49. content = f'<content>{chunks}<content/>'
  50. video_promt = f"""<video relpath="{video_item.doc.path}">{video_summarize}{content}<video/>"""
  51. logger.info(f"video_promt: {video_promt}")
  52. return video_promt
  53. def gen_prompt(nickname, content_models:List[CommentContent], video_promt):
  54. role = """
  55. 你是一个抖音视频的创作者的客服助理,你的昵称是'{nick_name}'。你发布了一个视频,<vedio_content> 是视频的部分文案片段。
  56. <chat> 是用户在视频评论、或用户各自的讨论。你需要回答最后一条 chat 信息。""".format(nick_name=nickname)
  57. logger.info(f"{role}")
  58. prompt = """
  59. {role}
  60. <chat>
  61. {chat}
  62. <chat/>
  63. <docs>
  64. {docs}
  65. <docs/>
  66. """
  67. chat_record = ''
  68. for model in content_models:
  69. chat_record += f"{model.nick_name}: {model.content}\n"
  70. return prompt.format(chat=chat_record, role=role, docs = "docs", vedio_content="none")
  71. async def chat_to_langchain(event_model:Events, comment_model:CommentContent, user_model:User):
  72. oauth_model:UserOAuthToken = user_model.oauth
  73. # 递归查找对话
  74. comment_replies = CommentRepository().get_comment_and_replies(comment_model.comment_id)
  75. query = comment_replies[-1].content
  76. video_promt = await get_video_promt(user_model, event_model)
  77. prompt = gen_prompt(user_model.info.nickname, comment_replies, video_promt)
  78. logger.info(f"query:{query} prompt: {prompt}")
  79. # langchain_res = await langchain_chat(str("4ff71182-5c43-497f-ba16-5b3ba252e478"), prompt)
  80. langchain_res = "这是一个示例回复"
  81. if not langchain_res:
  82. logger.error(f"langchain_chat {langchain_res} ")
  83. return
  84. response = await douyin.comment_reply.reply_to_comment(oauth_model.open_id,oauth_model.access_token, content=langchain_res, comment_id=comment_model.comment_id, item_id=comment_model.reply_to_item_id)
  85. if not response.get('data').get('error_code'):
  86. logger.info(f"回复评论成功: {langchain_res}")
  87. else:
  88. # 一般是秘钥过期、参数错误。还有一种特殊情况,两个账号都授权了思维链,一个账号在另一个账号下是 event_model.to_user_id 恰好授权了思维链,但是他是在别的授权
  89. logger.error(f"回复评论失败: {response}")
  90. # async def save_video_item(open_id, item_id):
  91. # hash_object = hashlib.md5(item_id.encode())
  92. # hex_dig = hash_object.hexdigest()
  93. # download_dir = os.path.join(TEMP_DIR,"video")
  94. # if not os.path.exists(download_dir):
  95. # os.makedirs(download_dir)
  96. # file_path = os.path.join(download_dir, hex_dig + ".mp4")
  97. # try:
  98. # file_path = await download_video(item_id,file_path)
  99. # if file_path:
  100. # pass
  101. # except Exception as e:
  102. # logger.exception(e)
  103. # if not os.path.exists(file_path):
  104. # os.remove(file_path)
  105. '''
  106. data = {'event': 'item_comment_reply', 'client_key': 'aw6aipmfdtplwtyq', 'from_user_id': '_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk', 'to_user_id': '_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk', 'content': '', 'log_id': '021708779405655fdbdfdbdfdbdfdbd0000000000000008cb19c7'}
  107. data_content = json.loads(data.get("content"))
  108. data_content = {"at_user_id":"","avatar":"https://p26.douyinpic.com/aweme/720x720/aweme-avatar/tos-cn-i-0813_a2afe121cfee43c7856b1ec0d6997690.jpeg?from=3782654143","comment_id":"@9VxS1/qCUc80K2etd8wkUc791mbgPP2DPZV2qA6mLFEQaPT960zdRmYqig357zEBoZm7vZ+ZZZz6H3mOVdTOlw==","comment_user_id":"_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk","content":"测试","create_time":1708779397,"digg_count":0,"level":1,"nick_name":"王孙草爱搞钱","parent_id":"7259290547288870144","reply_comment_total":0,"reply_to_comment_id":"0","reply_to_item_id":"@9VxS1/qCUc80K2etd8wkUc7912DgP/GCPpF2qwKuJ1YTb/X460zdRmYqig357zEBKzkoKYjBMUvl9Bs6h+CwYQ=="}
  109. 定义:
  110. - from_user_id: 在你视频下评论的人
  111. - to_user_id: 你自己的 open_id ,只有授权了思维链,才会收到自己视频的评论事件
  112. - 评论: 点开视频评论区就能看到的评论列表,或者在视频评论区中被人评论。
  113. - 例如:打开抖音视频,在评论区中,任何类似 “头像 昵称 \n 评论” 的内容,都可以看做一条评论。示例值: parent_id 、 comment_id
  114. - parent_id 父级评论id ,如果该评论没有父级评论,默认使用视频id作为父级
  115. - 回复评论: 回复“视频评论”的评论,也就是点开视频评论中“更多回复”中展示的各种子评论。
  116. - 例如:在抖音视频评论区中,点击“展开更多” ,看到类似 “头像 昵称 > 昵称 \n 评论” 这样的内容,都可以看做是回复
  117. - 示例值: reply_to_comment_id 、 comment_id
  118. - 如果回复了视频评论, reply_to_comment_id = 0
  119. - 如果回复了子评论, reply_to_comment_id = 被回复的 comment_id
  120. '''
  121. # 如果抖音API支持的话,理论上每一条 AI 回复的评论都会添加尾部注释: (此消息由@思维链AI助手辅助生成)。
  122. # 这不仅仅是打广告,也是为了观众区分哪一条是人工回复,哪一条是 AI 回复。
  123. async def item_comment_reply(data):
  124. logger.info(f"new item_comment_reply event, comment_data: {data}")
  125. db_events = EventRepository()
  126. db_comment = CommentRepository()
  127. event_model,comment_model = db_events.save_item_comment_reply(data)
  128. event_model:Events
  129. comment_model:CommentContent
  130. user_model:UserInfo = UserRepo().get_by_open_id(event_model.to_user_id)
  131. # 理论上不太可能 not user_model ,因为用户登录后授权,数据库 User 中就记录了用户信息。
  132. # 登录扫码时 url scopes=item.comment ,则就会收到 item_comment_reply 的回调事件
  133. if not user_model:
  134. logger.info(f"没有授权登录到思维链 {event_model.to_user_id}")
  135. return
  136. '''原则上不能让AI自己回复自己的评论,原因如下
  137. - 如果 AI 回答不完整,想手动回复该条评论,会导致让AI再次回复你自己手动评论的内容
  138. - 如果你回复自己的视频,在评论区阐述自己的观点,AI会回复你这条评论。但这很矛盾,你为什么要自己阐述完又让AI补充?
  139. - 会陷入死循环,自己回复自己。虽然技术上可以做到不让AI回复AI产生的评论,但意义何在?既然你选择手动评论,说明这段对话中已经不需要AI
  140. '''
  141. # 任何一条评论都可能 @某个用户,当 @思维链AI助手 或者 @自己 时,允许回复一次
  142. if comment_model.at_user_id == event_model.to_user_id:
  143. return
  144. if comment_model.comment_user_id == event_model.to_user_id:
  145. return
  146. # 如果是回复他人的视频评论
  147. elif comment_model.reply_to_comment_id == '':
  148. exist_comment:CommentContent = CommentRepository().select(
  149. CommentContent.comment_id == comment_model.parent_id,
  150. CommentContent.comment_user_id != event_model.to_user_id).first()
  151. # 如果存在,说明这条评论自己账号发表的
  152. if exist_comment:
  153. logger.info(f"收到 AI 发表评论的回调 {comment_model} exist_comment_id:{exist_comment.id}")
  154. pass
  155. else:
  156. await chat_to_langchain(event_model, comment_model, user_model)
  157. # 如果是回复他人的回复
  158. elif comment_model.reply_to_comment_id:
  159. # 查询这个回复事件是回复哪一条已有评论。
  160. exist_comment:CommentContent = CommentRepository().select(
  161. CommentContent.comment_id == comment_model.reply_to_comment_id,
  162. CommentContent.comment_user_id == event_model.to_user_id).first()
  163. logger.info(f"reply to exist_comment {exist_comment}")
  164. # 查询到已存在的评论,是回复自己
  165. if exist_comment:
  166. await chat_to_langchain(event_model, comment_model, user_model)
  167. async def main():
  168. data = {'event': 'item_comment_reply', 'client_key': 'aw6aipmfdtplwtyq', 'from_user_id': '_000QadFMhmU1jNCI3JdPnyVDL6XavC70dFy', 'to_user_id': '_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk', 'content': '{"at_user_id":"","avatar":"https://p11.douyinpic.com/aweme/720x720/aweme-avatar/tos-cn-i-0813_66c4e34ae8834399bbf967c3d3c919db.jpeg?from=3782654143","comment_id":"@9VxS1/qCUc80K2etd8wkUc791mHvNP6BPpF0rg6iK1Qba/L660zdRmYqig357zEBevPVhKm8JTHRrBUxsAwWCQ==","comment_user_id":"_000QadFMhmU1jNCI3JdPnyVDL6XavC70dFy","content":"7","create_time":1710604351,"digg_count":0,"level":1,"nick_name":"程序员马工","parent_id":"@9VxS1/qCUc80K2etd8wkUc7912DgP/GCPpF2qwKuJ1YTb/X460zdRmYqig357zEBKzkoKYjBMUvl9Bs6h+CwYQ==","reply_comment_total":0,"reply_to_comment_id":"","reply_to_item_id":"@9VxS1/qCUc80K2etd8wkUc7912DgP/GCPpF2qwKuJ1YTb/X460zdRmYqig357zEBKzkoKYjBMUvl9Bs6h+CwYQ=="}', 'log_id': '202403162352304940C40D949F65E2B93E'}
  169. await item_comment_reply(data)
  170. if __name__ == "__main__":
  171. asyncio.run(main())