| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193 |
- import asyncio
- import hashlib
- import json
- from typing import List
- import os
- import sys
- sys.path.append(os.path.dirname(os.path.dirname(__file__)))
- from db.comment import CommentRepository
- from db.docs import CategoriesRepository,Categories
- from db.user_oauth import UserOAuthRepository,UserOAuthToken
- from config import logger,TEMP_DIR,MNT_DOUYIN_DATA
- from grpc_m.send_data_to_vector import langchain_chat,simarity_search
- import douyin.comment_reply
- from fastapi import FastAPI,APIRouter, File, HTTPException
- from db.comment import CommentContent,Events,EventRepository
- from db.user_info import UserInfo,UserInfoRepository
- from db.docs import DocumentsRepository,Documents
- from db.user import User,UserRepo,engine
- from db.video_data import VideoData,VideoItemDocRepo
- from grpc_m.client import get_doc_vector,save_doc_vector
- from sqlmodel import Field, SQLModel,Relationship,create_engine,Session,select,func,Column
- from tool.video_get import download_video_by_item_id,get_video_download_urls,download_video
- from tool.exec_asr_client import run_asr_client
- from enum import Enum
- from tool.job_video_asr import video_item_to_vector
-
- async def get_video_promt(user:User, event_model:Events):
- video_summarize = ""
- content = ""
- video_item = VideoItemDocRepo().get_or_create(user, event_model.content.reply_to_item_id)
-
- # 该视频没有提取文案和向量计算
- if not video_item.doc or video_item.doc.status != 100:
- # video_to_txt_path = os.path.join(MNT_DOUYIN_DATA, user.open_id, "docs", "_video", f"{event_model.id}.txt")
- # video_item = VideoItemDocRepo().add_vedio_item_doc(user, event_model.content.reply_to_item_id, video_to_txt_path)
- asyncio.create_task(video_item_to_vector(video_item))
- return ""
- else:
- if video_item.doc.summarize:
- video_summarize = f"<summarize>{video_item.doc.summarize}<summarize/>"
- doc_query_res = await get_doc_vector(video_item.doc.id, event_model.content.content)
- content = ""
- chunks = ""
- for item in doc_query_res:
- chunk_data = item.chunk
- metadata_dict = dict(item.metadata)
- score_value = item.score
- uuid_value = item.uuid
- chunks += f'<chunks score="{item.score}">{item.chunk}<chunks/>'
- if chunks:
- content = f'<content>{chunks}<content/>'
-
- video_promt = f"""<video relpath="{video_item.doc.path}">{video_summarize}{content}<video/>"""
- logger.info(f"video_promt: {video_promt}")
- return video_promt
- def gen_prompt(nickname, content_models:List[CommentContent], video_promt):
- role = """
- 你是一个抖音视频的创作者的客服助理,你的昵称是'{nick_name}'。你发布了一个视频,<vedio_content> 是视频的部分文案片段。
- <chat> 是用户在视频评论、或用户各自的讨论。你需要回答最后一条 chat 信息。""".format(nick_name=nickname)
- logger.info(f"{role}")
- prompt = """
- {role}
- <chat>
- {chat}
- <chat/>
- <docs>
- {docs}
- <docs/>
- """
- chat_record = ''
- for model in content_models:
- chat_record += f"{model.nick_name}: {model.content}\n"
-
- return prompt.format(chat=chat_record, role=role, docs = "docs", vedio_content="none")
- async def chat_to_langchain(event_model:Events, comment_model:CommentContent, user_model:User):
- oauth_model:UserOAuthToken = user_model.oauth
- # 递归查找对话
- comment_replies = CommentRepository().get_comment_and_replies(comment_model.comment_id)
- query = comment_replies[-1].content
- video_promt = await get_video_promt(user_model, event_model)
- prompt = gen_prompt(user_model.info.nickname, comment_replies, video_promt)
- logger.info(f"query:{query} prompt: {prompt}")
- # langchain_res = await langchain_chat(str("4ff71182-5c43-497f-ba16-5b3ba252e478"), prompt)
- langchain_res = "这是一个示例回复"
- if not langchain_res:
- logger.error(f"langchain_chat {langchain_res} ")
- return
- response = await douyin.comment_reply.reply_to_comment(oauth_model.open_id,oauth_model.access_token, content=langchain_res, comment_id=comment_model.comment_id, item_id=comment_model.reply_to_item_id)
- if not response.get('data').get('error_code'):
- logger.info(f"回复评论成功: {langchain_res}")
-
- else:
- # 一般是秘钥过期、参数错误。还有一种特殊情况,两个账号都授权了思维链,一个账号在另一个账号下是 event_model.to_user_id 恰好授权了思维链,但是他是在别的授权
- logger.error(f"回复评论失败: {response}")
- # async def save_video_item(open_id, item_id):
- # hash_object = hashlib.md5(item_id.encode())
- # hex_dig = hash_object.hexdigest()
- # download_dir = os.path.join(TEMP_DIR,"video")
- # if not os.path.exists(download_dir):
- # os.makedirs(download_dir)
- # file_path = os.path.join(download_dir, hex_dig + ".mp4")
- # try:
- # file_path = await download_video(item_id,file_path)
- # if file_path:
- # pass
- # except Exception as e:
- # logger.exception(e)
- # if not os.path.exists(file_path):
- # os.remove(file_path)
- '''
- data = {'event': 'item_comment_reply', 'client_key': 'aw6aipmfdtplwtyq', 'from_user_id': '_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk', 'to_user_id': '_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk', 'content': '', 'log_id': '021708779405655fdbdfdbdfdbdfdbd0000000000000008cb19c7'}
- data_content = json.loads(data.get("content"))
- data_content = {"at_user_id":"","avatar":"https://p26.douyinpic.com/aweme/720x720/aweme-avatar/tos-cn-i-0813_a2afe121cfee43c7856b1ec0d6997690.jpeg?from=3782654143","comment_id":"@9VxS1/qCUc80K2etd8wkUc791mbgPP2DPZV2qA6mLFEQaPT960zdRmYqig357zEBoZm7vZ+ZZZz6H3mOVdTOlw==","comment_user_id":"_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk","content":"测试","create_time":1708779397,"digg_count":0,"level":1,"nick_name":"王孙草爱搞钱","parent_id":"7259290547288870144","reply_comment_total":0,"reply_to_comment_id":"0","reply_to_item_id":"@9VxS1/qCUc80K2etd8wkUc7912DgP/GCPpF2qwKuJ1YTb/X460zdRmYqig357zEBKzkoKYjBMUvl9Bs6h+CwYQ=="}
- 定义:
- - from_user_id: 在你视频下评论的人
- - to_user_id: 你自己的 open_id ,只有授权了思维链,才会收到自己视频的评论事件
- - 评论: 点开视频评论区就能看到的评论列表,或者在视频评论区中被人评论。
- - 例如:打开抖音视频,在评论区中,任何类似 “头像 昵称 \n 评论” 的内容,都可以看做一条评论。示例值: parent_id 、 comment_id
- - parent_id 父级评论id ,如果该评论没有父级评论,默认使用视频id作为父级
- - 回复评论: 回复“视频评论”的评论,也就是点开视频评论中“更多回复”中展示的各种子评论。
- - 例如:在抖音视频评论区中,点击“展开更多” ,看到类似 “头像 昵称 > 昵称 \n 评论” 这样的内容,都可以看做是回复
- - 示例值: reply_to_comment_id 、 comment_id
- - 如果回复了视频评论, reply_to_comment_id = 0
- - 如果回复了子评论, reply_to_comment_id = 被回复的 comment_id
- '''
- # 如果抖音API支持的话,理论上每一条 AI 回复的评论都会添加尾部注释: (此消息由@思维链AI助手辅助生成)。
- # 这不仅仅是打广告,也是为了观众区分哪一条是人工回复,哪一条是 AI 回复。
- async def item_comment_reply(data):
- logger.info(f"new item_comment_reply event, comment_data: {data}")
- db_events = EventRepository()
- db_comment = CommentRepository()
- event_model,comment_model = db_events.save_item_comment_reply(data)
- event_model:Events
- comment_model:CommentContent
- user_model:UserInfo = UserRepo().get_by_open_id(event_model.to_user_id)
- # 理论上不太可能 not user_model ,因为用户登录后授权,数据库 User 中就记录了用户信息。
- # 登录扫码时 url scopes=item.comment ,则就会收到 item_comment_reply 的回调事件
- if not user_model:
- logger.info(f"没有授权登录到思维链 {event_model.to_user_id}")
- return
- '''原则上不能让AI自己回复自己的评论,原因如下
- - 如果 AI 回答不完整,想手动回复该条评论,会导致让AI再次回复你自己手动评论的内容
- - 如果你回复自己的视频,在评论区阐述自己的观点,AI会回复你这条评论。但这很矛盾,你为什么要自己阐述完又让AI补充?
- - 会陷入死循环,自己回复自己。虽然技术上可以做到不让AI回复AI产生的评论,但意义何在?既然你选择手动评论,说明这段对话中已经不需要AI
- '''
- # 任何一条评论都可能 @某个用户,当 @思维链AI助手 或者 @自己 时,允许回复一次
- if comment_model.at_user_id == event_model.to_user_id:
-
- return
-
- if comment_model.comment_user_id == event_model.to_user_id:
- return
-
- # 如果是回复他人的视频评论
- elif comment_model.reply_to_comment_id == '':
- exist_comment:CommentContent = CommentRepository().select(
- CommentContent.comment_id == comment_model.parent_id,
- CommentContent.comment_user_id != event_model.to_user_id).first()
- # 如果存在,说明这条评论自己账号发表的
- if exist_comment:
- logger.info(f"收到 AI 发表评论的回调 {comment_model} exist_comment_id:{exist_comment.id}")
- pass
- else:
- await chat_to_langchain(event_model, comment_model, user_model)
- # 如果是回复他人的回复
- elif comment_model.reply_to_comment_id:
- # 查询这个回复事件是回复哪一条已有评论。
- exist_comment:CommentContent = CommentRepository().select(
- CommentContent.comment_id == comment_model.reply_to_comment_id,
- CommentContent.comment_user_id == event_model.to_user_id).first()
- logger.info(f"reply to exist_comment {exist_comment}")
- # 查询到已存在的评论,是回复自己
- if exist_comment:
- await chat_to_langchain(event_model, comment_model, user_model)
-
- async def main():
- data = {'event': 'item_comment_reply', 'client_key': 'aw6aipmfdtplwtyq', 'from_user_id': '_000QadFMhmU1jNCI3JdPnyVDL6XavC70dFy', 'to_user_id': '_000LiV_o0FGKMwaZgqGMCvcVSf-UAnNU_kk', 'content': '{"at_user_id":"","avatar":"https://p11.douyinpic.com/aweme/720x720/aweme-avatar/tos-cn-i-0813_66c4e34ae8834399bbf967c3d3c919db.jpeg?from=3782654143","comment_id":"@9VxS1/qCUc80K2etd8wkUc791mHvNP6BPpF0rg6iK1Qba/L660zdRmYqig357zEBevPVhKm8JTHRrBUxsAwWCQ==","comment_user_id":"_000QadFMhmU1jNCI3JdPnyVDL6XavC70dFy","content":"7","create_time":1710604351,"digg_count":0,"level":1,"nick_name":"程序员马工","parent_id":"@9VxS1/qCUc80K2etd8wkUc7912DgP/GCPpF2qwKuJ1YTb/X460zdRmYqig357zEBKzkoKYjBMUvl9Bs6h+CwYQ==","reply_comment_total":0,"reply_to_comment_id":"","reply_to_item_id":"@9VxS1/qCUc80K2etd8wkUc7912DgP/GCPpF2qwKuJ1YTb/X460zdRmYqig357zEBKzkoKYjBMUvl9Bs6h+CwYQ=="}', 'log_id': '202403162352304940C40D949F65E2B93E'}
- await item_comment_reply(data)
- if __name__ == "__main__":
- asyncio.run(main())
|