Forráskód Böngészése

prefect方式完成私信回复

mrh 1 éve
szülő
commit
a19b1c1a3d
13 módosított fájl, 907 hozzáadás és 90 törlés
  1. 16 7
      conf/config.py
  2. 44 0
      database/config.py
  3. 86 0
      database/s3.py
  4. 1 1
      douyin/api/req_api.py
  5. 48 0
      douyin/base.py
  6. 265 0
      douyin/chat.py
  7. 143 0
      douyin/chat_flow.py
  8. 56 0
      douyin/chat_test.py
  9. 207 0
      douyin/conversation.py
  10. 0 0
      douyin/cookies.py
  11. 40 0
      douyin/user.py
  12. 0 81
      dp/chat.py
  13. 1 1
      dp/page.py

+ 16 - 7
conf/config.py

@@ -1,19 +1,20 @@
 import sys
 import os
+import pathlib
 from DrissionPage import ChromiumOptions
 import dataset
 from loguru import logger
 HOST='localhost'
 PORT=9226
 
-WORK_DIR = os.path.dirname(os.path.dirname(__file__))
-CONF_DIR =  os.path.join(WORK_DIR, 'conf')
-OUTPUT = os.path.join(WORK_DIR, 'output')
-PAGE_OUTPUT = os.path.join(OUTPUT, 'page')
+WORK_DIR = pathlib.Path(__file__).parent.parent
+CONF_DIR =  WORK_DIR/'conf'
+OUTPUT =  WORK_DIR/'output'
+PAGE_OUTPUT =  OUTPUT/'page'
 if len(sys.argv)>=3 and sys.argv[1] == "-c":
     INI_PATH = sys.argv[2]
 else:
-    INI_PATH = os.path.join(CONF_DIR, 'dp_configs.ini')
+    INI_PATH =  CONF_DIR/'dp_configs.ini'
 
 if not os.path.exists(OUTPUT):
     os.mkdir(OUTPUT)
@@ -41,7 +42,7 @@ def find_edge_path_in_registry():
     return path  
 
 logger.debug(f"find_edge browser path: {find_edge_path_in_registry()}")
-USER_DATA = os.path.join(OUTPUT, "userData_ai-yunying")
+USER_DATA =  OUTPUT/'userData_ai-yunying'
 # BROWSER_PATH 值理论无需手动设置,因为该模块会自动在默认路径查找 Chrome ,因此下列判断可以删去
 if 'win' in sys.platform:
     if not os.path.exists(INI_PATH):
@@ -59,4 +60,12 @@ if 'win' in sys.platform:
         logger.debug(f"load init {INI_PATH}")
         logger.debug(f"chrome_options.browser_path {chrome_options.browser_path}")
 elif sys.platform == 'linux':
-    chrome_options = ChromiumOptions(ini_path=INI_PATH)
+    chrome_options = ChromiumOptions(ini_path=INI_PATH)
+
+# import consul_srv_client
+# ip,port = consul_srv_client.get_srv("prefect.service.consul")
+prefect_api = f"http://{'10.0.0.1'}:{'8612'}/api"
+# 在导入 prefect 之前设置环境变量
+os.environ["PREFECT_API_URL"] = prefect_api
+
+G4F_API = "http://pc:8003/"

+ 44 - 0
database/config.py

@@ -0,0 +1,44 @@
+import datetime
+from dataset import Table
+import dataset
+from minio import Minio
+import os
+prefect_api = f"http://10.0.0.1:8612/api"
+# 在导入 prefect 之前设置环境变量
+os.environ["PREFECT_API_URL"] = prefect_api
+
+'''
+读写
+F3i2GpD5VmpCIQjPuq2m
+5D5V8Ue0DtjkzBFYStBO4gyqeTLYcfvR28EYFe0q
+只读
+hudyqo02Y1Asm6jjSYaj
+Dcm6gw4QqKLZRi0Awy1nVjwKsCKBn3WB9lWJpiG0
+'''
+
+os.environ["s3_access_key"] = 'F3i2GpD5VmpCIQjPuq2m' 
+os.environ["s3_secret_key"] = "5D5V8Ue0DtjkzBFYStBO4gyqeTLYcfvR28EYFe0q"
+os.environ["s3_endpoint"] = "sv-v:9002"
+from prefect.filesystems import LocalFileSystem, RemoteFileSystem
+from prefect.blocks.core import Block
+minio_block = RemoteFileSystem(
+    basepath="s3://swl/prefect/",
+    settings={
+    "key": os.environ["s3_access_key"],
+    "secret": os.environ["s3_secret_key"],
+    "client_kwargs": {"endpoint_url": 'http://' + os.environ["s3_endpoint"]} ,
+},)
+# minio_block.save("minio", overwrite=True)
+minio_block:RemoteFileSystem = minio_block.load("minio")
+
+
+# s3client = Minio(os.environ["s3_endpoint"] ,
+#     access_key=os.environ["s3_access_key"],
+#     secret_key=os.environ["s3_secret_key"],
+#     secure=False
+# )
+ai_yunying_db = dataset.connect(f'postgresql://pg:pg@sv-v:5432/ai_yunying')
+# content_type header json
+# print(s3client.fput_object(content_type="application/json", bucket_name='ai-yunying'))
+# print(db.tables)
+

+ 86 - 0
database/s3.py

@@ -0,0 +1,86 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+from io import BytesIO
+import json
+import mimetypes
+import config
+import datetime
+from conf.config import logger
+import pickle  
+
+class S3:
+    # s3client is minio python SDK client
+    def __init__(self, bucket='swl', client=config.s3client) -> None:
+        self.bucket = bucket
+        self.minio_client = client
+    # 根据本地时间自动获取对象存储前缀,如 log/2024-04-25/xxx
+    def get_object_prefix(self, dir='log'):
+        # 获取时分秒,并且符合路径的格式
+        return dir + '/' + str(datetime.date.today()) 
+    
+    def get_object_name_by_time(self):
+        # 获取时分秒毫秒,并且符合路径的格式,如 023213_123.json
+        now = datetime.datetime.now()
+        # 格式化时间:小时、分钟、秒、毫秒
+        formatted_time = now.strftime("%H%M%S_%f")  # %f 提供了微秒,所以我们取前三个数字作为毫秒
+        return formatted_time
+
+    # def put_dict(self, obj, name_by_time=True):
+
+
+    def put_json(self, obj, object_name='',name_by_time=True):
+        # object_name 如果为空,则获取 时分秒毫秒,并且符合路径的格式
+        if name_by_time:
+            object_name = self.get_object_name_by_time() + object_name
+
+        object_name = self.get_object_prefix() + '/' + object_name
+        if isinstance(obj, dict):
+            obj = json.dumps(obj).encode()
+        else:
+            obj = obj.encode()
+        data_stream = BytesIO(obj)
+        # 使用 put_object 上传数据  
+        self.minio_client.put_object(  
+            bucket_name=self.bucket,   
+            data=data_stream,   
+            object_name=object_name,   
+            content_type='application/json',   
+            length=data_stream.getvalue().__len__()  # 指定上传文件大小,否则会报错:i:\code\ai-yunying
+        )  
+        data_stream.close()
+        logger.info(f'put object {object_name} to s3 success')
+        return object_name
+    
+    def fput(self, file_path, object_name='', content_type=''):
+        # 根据文件名自动获取 object_name
+        if object_name == '':
+            object_name = file_path.split('/')[-1]
+        # 根据文件后缀自动识别 content_type
+        if content_type == '':
+            content_type = mimetypes.guess_type(file_path)[0] or 'application/octet-stream'
+        object_name = self.get_object_prefix() + '/' + object_name
+        # 使用 put_object 上传数据  
+        self.minio_client.fput_object(  
+            bucket_name=self.bucket,   
+            file_path=file_path,   
+            object_name=object_name,   
+            content_type=content_type  
+        ) 
+    
+    def get_object(self, object_name):
+        res = self.minio_client.get_object(bucket_name=self.bucket, object_name=object_name)
+        return res.json()
+
+def main():
+    s3 = S3()
+    test = {"dat": "test", 'ok':[1,2,3,3]}
+    # s3.fput(r'I:\code\ai-yunying\live-online-people\output\data.json', 'log/20240425/test2.json')
+    path = s3.put_json(obj=test, object_name='test.json', name_by_time=False)
+    logger.info(f"{path}")
+    res = s3.get_object(object_name=path)
+    logger.info(f"{res}")
+
+if __name__ == "__main__":
+    main()

+ 1 - 1
dp/req_api.py → douyin/api/req_api.py

@@ -1,7 +1,7 @@
 import requests
 from requests import Request,Response
 import DrissionPage
-import cookies
+import douyin.cookies as cookies
 from conf.config import logger
 import jsonpath
 

+ 48 - 0
douyin/base.py

@@ -0,0 +1,48 @@
+import base64
+import datetime
+import json
+import pickle
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+from conf.config import OUTPUT,PAGE_OUTPUT
+from database.config import minio_block
+import prefect.runtime
+from pyunit_time import Time
+from dp.page import page
+
+import time  
+from datetime import datetime  
+
+tab=page.get_tab(url='https://www.douyin.com/user/self')
+
+
+def get_object_name_by_time():
+    # 获取时分秒毫秒,并且符合路径的格式,如 023213_123.json
+    now = datetime.now()
+    # 格式化时间:小时、分钟、秒、毫秒
+    formatted_time = now.strftime("%Y%m%d/%H%M%S_%f")  # %f 提供了微秒,所以我们取前三个数字作为毫秒
+    return formatted_time + '-{task_run.task_name}'
+
+
+def get_result(path:str):
+    if path.startswith(minio_block.basepath):
+        path = path[len(minio_block.basepath):]
+    bytes = minio_block.read_path(path)
+    json_data = json.loads(bytes)
+    base64_data = json_data['data']
+    decoded_data = base64.b64decode(base64_data)
+    result = pickle.loads(decoded_data)
+    return result
+  
+def save_page_info(local=False, file_name=''):
+    if not file_name:
+        file_name = datetime.now().strftime("%Y%m%d-%H%M%S_%f")
+    if local:
+        f = open(PAGE_OUTPUT/f'tab-{file_name}.html', 'w')
+        f.write(tab.html)
+
+    # tab.html
+    minio_block.write_path(f'{file_name}.html',tab.html.encode() )
+    img_bytes = tab.get_screenshot(as_bytes=True)
+    minio_block.write_path(f'{file_name}.png',img_bytes )

+ 265 - 0
douyin/chat.py

@@ -0,0 +1,265 @@
+import os
+import re
+import time
+import page
+import chat_test
+from conf.config import logger,OUTPUT
+from database.config import ai_yunying_db
+from dataset import Table
+from DrissionPage import ChromiumPage
+from DrissionPage._elements.chromium_element import ChromiumElement
+from DrissionPage._units.listener import DataPacket
+import jsonpath
+from prefect import flow,task
+import chat_flow
+
+class User:
+    def __init__(self, db=ai_yunying_db) -> None:
+        self.db = db
+        self.table_user:Table = self.db.get_table("user")
+
+    # 解析来自这个包请求返回的信息: https://www.douyin.com/aweme/v1/web/user/profile/other/
+    # 返回的包内容如: .\testm\user_profile_response.py
+    def filter_from_user_profile(self, body:dict):
+        user = body['user']
+        save_data = {
+            "uid": user.get('uid'),
+            "nickname": user.get('nickname'),
+            "avatar_medium": user.get("avatar_medium"),
+            "sec_uid": user.get('sec_uid'),
+            "signature": user.get('signature'),
+            "city": user.get('city'),
+            "ip_location": user.get('ip_location'),
+            "province": user.get('province'),
+            "school_name" : user.get('school_name'),
+            "follow_status": user.get('follow_status'),
+            "follower_count" : user.get('follower_count'),
+            "total_favorited" : user.get('total_favorited'),
+            "aweme_count": user.get('aweme_count'),
+        }
+        return save_data
+    def save_user_profile_to_db(self, body:dict):
+        save_data = self.filter_from_user_profile(body)
+        id = self.table_user.insert_ignore(save_data, keys=["uid"])
+        logger.info(f"插入/存在则忽略用户 id {id}")
+        if id:
+            return self.table_user.find_one(id=id)
+        else:
+            return self.table_user.find_one(uid=save_data['uid'])
+
+class Chat:
+    homepage = 'https://www.douyin.com/user/self'
+    def __init__(self, db=ai_yunying_db) -> None:
+        self.tab_home = page.page.tab
+        # logger.info(f"{self.tab_home.url}") 
+        self.check_home_page()
+        self.user = User(db)
+        self.mg_test = chat_test.Chat()
+        self.mg_test.send_msg("你好")
+
+    def check_has_im_msg(self):
+        self.check_home_page()
+        self.tab_home.scroll.to_top()
+        ele_im = self.tab_home.ele('xpath://div[@data-e2e="im-entry"]')
+        ele_im_red_pot = ele_im._find_elements('xpath://div[@class="LFWqFfyH isLight"]', raise_err=False)
+        # 存在私信小红点
+        ele_has_msg = ele_im_red_pot._find_elements('xpath:/div',raise_err=False)
+        return ele_im, ele_has_msg
+    def run(self):
+        stranger_msg_count = 0
+        ele_im, ele_has_msg = self.check_has_im_msg()
+        chat_flow.response_im(tab=self.tab_home, ele_im=ele_im)
+        return
+        while True:
+            ele_im, ele_has_msg = self.check_has_im_msg()
+            if ele_has_msg:
+                logger.info(f"有未读消息 {ele_has_msg.text}")
+                chat_flow.response_im(tab=self.tab_home, ele_im=ele_im)
+            stranger_msg_count +=1
+            if stranger_msg_count > 10:
+                stranger_msg_count = 0
+                ele_im.click()
+                ele_stranger = ele_im._find_elements("陌生人消息", raise_err=False)
+                if ele_stranger:
+                    logger.info(f"有陌生人消息")
+                    self.response_stranger()
+            time.sleep(1)
+    
+    def parse_conversations_content_text(self, html):  
+        # 正则表达式匹配 <pre> 标签中的文本和 <img> 标签中的 alt 属性值  
+        pre_pattern = re.compile(r'<pre>(.*?)</pre>', re.DOTALL)  
+        img_alt_pattern = re.compile(r'<img[^>]+alt="\[(.*?)\]"', re.DOTALL)  
+        
+        # 查找所有 <pre> 标签中的文本  
+        pres = pre_pattern.findall(html)  
+        
+        # 查找所有 <img> 标签中的 alt 属性值  
+        alts = img_alt_pattern.findall(html)  
+        
+        # 结合这两部分来构建最终的字符串  
+        result_items = []  
+        pre_index, alt_index = 0, 0  
+        while pre_index < len(pres) or alt_index < len(alts):  
+            if pre_index < len(pres):  
+                result_items.append(pres[pre_index])  
+                pre_index += 1  
+            if alt_index < len(alts):  
+                result_items.append('[' + alts[alt_index] + ']')  
+                alt_index += 1  
+                
+        # 返回连接后的字符串  
+        return ''.join(result_items)  
+    def analyze_conversations_content(self,ele_content:ChromiumElement, conversation_detail:DataPacket):
+        ret = {
+            "is_me": None,
+            "content": {
+                "type": None,
+                "data": None,
+            },
+        }
+        ret['is_me'] = True if 'tIJOLE11' in ele_content.attr("class") else False
+        text = ele_content._find_elements('xpath://span[@class="WCSQFekt"]', raise_err=False)
+        if text:
+            text = self.parse_conversations_content_text(text.text)
+            ret['content']['data'] = text
+            ret['content']['type'] = "text"
+        else:
+            imgs = ele_content._find_elements('xpath://img', raise_err=False)
+            # 视频通常会包含封面和播放按钮,所以这里会有两个 img 元素
+            if imgs and isinstance(imgs, list):
+                ret['content']['type'] = "video"
+                if conversation_detail:
+                    data = conversation_detail.response.body
+                    ret['content']['data'] = data
+            else:
+                img = ele_content._find_elements('xpath://div[@class="UDDVxYoC"]', raise_err=False)
+                if img:
+                    ret['content'] = img.attr("src")
+                    ret['content_type'] = "image"
+        return ret
+        # logger.info(f"ele_content.html {ele_content.html}")
+        # if ret['is_me']:
+        #     logger.info(f"me {ele_content.s_ele('xpath://span').text}")
+        # else:
+        #     logger.info(f"user {ele_content.s_ele('xpath://span').text}")
+
+    def get_all_conversations(self, ele_popShadowAnimation:ChromiumElement, conversation_detail:DataPacket):
+        # logger.info(f"{ele_popShadowAnimation.html}")
+        # 查找聊天框内所有的消息 class="A1KpIsbL HO4aqgd4"
+        eles_msg = ele_popShadowAnimation.s_eles('xpath://div[@class="A1KpIsbL HO4aqgd4"]')
+        i = 0
+        for ele in eles_msg:
+            logger.info(f"ele.html {ele.html}")
+            # 是否存在时间 <div class="kZAHYArp">18:07 </div>
+            is_ele_time = ele._find_elements('xpath://div[@class="kZAHYArp"]', raise_err=False)
+            ele_content:ChromiumElement = ele._find_elements('xpath://div[contains(@class, "SZtuWb3S")]', raise_err=False)
+            res = self.analyze_conversations_content(ele_content, conversation_detail)
+            res.update({'time': is_ele_time.text if is_ele_time else None})
+            logger.info(f"res {res}")
+
+    def enter_conversation(self, conversation_item:ChromiumElement):
+        # 找到聊天会话框
+        # class="qbjZBApl popShadowAnimation"
+        ele_popShadowAnimation = self.tab_home.ele('xpath://div[@class="qbjZBApl popShadowAnimation"]')
+        user_info = None
+        try:
+            user_profile_packet = None
+            conversation_detail = None
+            # 获取该用户的基本信息
+            for packet in self.tab_home.listen.steps(2, timeout=3):
+                if "user/profile" in packet.url:
+                    user_profile_packet = packet
+                if "aweme/detail" in packet.url:
+                    conversation_detail = packet
+                if user_profile_packet and conversation_detail:
+                    break
+            logger.info(f"user_profile_packet {user_profile_packet}")
+            logger.info(f"conversation_detail {conversation_detail}")
+            user_info = self.user.save_user_profile_to_db(user_profile_packet.response.body)
+        except Exception as e:
+            logger.exception(f"获取用户信息失败 {e} conversation_item {conversation_item}")
+            self.tab_home.listen.stop()
+            ele_popShadowAnimation.ele('退出会话').click()
+            return
+        return ele_popShadowAnimation,user_profile_packet,conversation_detail
+    
+
+    # ele_im 顶部私信图标元素
+    def response_frined(self, ele_im:ChromiumElement):
+        logger.info(f"回复好友")
+        ele_im.click()
+        self.tab_home.listen.start("www.douyin.com/aweme/v1/web")
+        # 找到私信弹出窗口
+        # data-e2e="listDlg-container"
+        ele_list_dlg = self.tab_home.ele('xpath://div[@data-e2e="listDlg-container"]')
+        logger.info(f"ele_list_dlg {ele_list_dlg}")
+        
+        # logger.info(f"ele_list_dlg.html {ele_list_dlg.html}")
+        # "conversation-item"
+        conversation_items = ele_list_dlg.eles('xpath://div[@data-e2e="conversation-item"]')
+        logger.info(f"conversation_items {conversation_items}")
+        name = ""
+        ele_msg_red_pot = None
+        conversation_item = None
+        for item in conversation_items:
+            conversation_item = item
+            logger.info(f"{item.html}")
+            # 找到未读消息小红点
+            # class="hcPUqxqn"
+            ele_msg_red_pot = item._find_elements('xpath://div[@class="hcPUqxqn"]', raise_err=False)
+            if ele_msg_red_pot:
+                name = item.ele('xpath://div[@class="gZdlhsqq"]').text
+                break
+        if not ele_msg_red_pot:
+            logger.error(f"没有找到元素 ele_msg_red_pot")
+            return
+        
+        # 点击未读消息小红点
+        ele_msg_red_pot.click()
+        
+        ele_popShadowAnimation,user_profile_packet,conversation_detail = self.enter_conversation(conversation_item)
+        self.get_all_conversations(ele_popShadowAnimation, conversation_detail)
+        return
+        # 找到输入框
+        # data-e2e="msg-input"
+        ele_input = ele_popShadowAnimation.ele('xpath://div[@data-e2e="msg-input"]')
+        ele_input.click()
+        ele_input.input("hello")
+
+        # 找到发送按钮
+        # span class="e2e-send-msg-btn"
+        ele_send = ele_popShadowAnimation.ele('xpath://span[contains(@class, "e2e-send-msg-btn")]')
+        ele_send.click()
+        ele_popShadowAnimation.ele('退出会话').click()
+
+        logger.info(f"回复成功")
+
+    def response_stranger(self):
+        logger.info(f"回复陌生人")
+        with open(os.path.join(OUTPUT, 'page', time.strftime("%Y-%m-%d_%H-%M-%S", time.localtime(time.time())), 'w')) as f:
+            f.write(self.tab_home.html)
+
+
+
+
+    def check_home_page(self):
+        if "/user/self" not in self.tab_home.url:
+            self.tab_home.get(self.homepage)
+
+
+def main():
+    self = Chat()
+    self.run()
+    # ele_im, ele_has_msg = self.check_has_im_msg()
+    # ele_im.click()
+    # self.tab_home.listen.clear()
+    # self.tab_home.listen.start("www.douyin.com", method="GET")
+    # ele_popShadowAnimation = self.tab_home.ele('xpath://div[@class="qbjZBApl popShadowAnimation"]')
+    # ele_im = self.tab_home.ele('xpath://div[@data-e2e="im-entry"]')
+    # logger.info(f"{ele_im.html}")
+    # # <div class="LFWqFfyH isLight">
+    # has_msg_ele = ele_im.ele('xpath://div[@class="LFWqFfyH isLight"]')
+    # logger.info(f"{has_msg_ele.html}")
+
+if __name__ == "__main__":
+    main()

+ 143 - 0
douyin/chat_flow.py

@@ -0,0 +1,143 @@
+import datetime
+import os
+import re
+import time
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+from dp.page import page
+from douyin import chat_test,base,conversation,user
+from conf.config import logger,OUTPUT
+from database.config import ai_yunying_db,minio_block
+from dataset import Table
+from DrissionPage import ChromiumPage
+from DrissionPage._elements.chromium_element import ChromiumElement
+from DrissionPage._units.listener import DataPacket
+import jsonpath
+from prefect import flow,task
+from prefect.tasks import Task,TaskRun
+from prefect.flows import Flow
+from prefect.states import State,StateType
+
+tab=page.tab
+
+
+
+# 点击私信图标后,获取未读消息小圆点
+@task
+def get_ele_msg_red_pot(ele_im:ChromiumElement):
+    ele_im.click()
+    logger.info(f"点击私信图标后,获取未读消息小圆点")
+    ele_list_dlg = tab.ele('xpath://div[@data-e2e="listDlg-container"]')
+    ele_msg_red_pot = ele_list_dlg.ele('xpath://div[@class="hcPUqxqn"]')
+    return ele_list_dlg,ele_msg_red_pot
+
+@task(persist_result=True, result_storage=minio_block, result_storage_key=base.get_object_name_by_time())
+def get_im_unread_item_user_data(ele_msg_red_pot:ChromiumElement):
+    logger.info(f"存在未读消息,获取该的用户头像、昵称、时间等信息")
+    # 构建所需的数据字典  
+    data = {  
+        "name": '',  
+        "avator": '',  
+        "msg": None,
+        "unread_msg_count":None,
+        "time": ''
+    }
+
+    data["unread_msg_count"] = int(ele_msg_red_pot.text)
+    # 定位私信聊天框,一个用户私信聊天框的完整元素
+    msg_item_div = ele_msg_red_pot.parent() 
+    name = msg_item_div.s_ele("xpath://div[@class='gZdlhsqq']")
+    if name:
+        data["name"] = name.text
+    # 定位头像  
+    avatar_ele = msg_item_div.s_ele("xpath://img")
+    if avatar_ele:
+        data["avator"] = avatar_ele.attr('src')  
+    
+    msg_content = msg_item_div.s_ele('xpath://pre[@class="MnyOYvbN"]')
+    if msg_content:
+        data["msg"] = msg_content.text  
+
+    # 定位时间  
+    ele_time = msg_item_div.s_ele('xpath://div[@class="skNuRdW_"]')
+    if ele_time:
+        data["time"] = ele_time.text  
+
+    table:Table = ai_yunying_db['chat_task']
+    data['is_done'] = False
+    data['create_time'] = datetime.datetime.now()
+    id = table.insert(data)
+    unread_user_data = table.find_one(id=id)
+    logger.info(f"{unread_user_data}")
+    return unread_user_data
+
+
+@task
+def click_im_icon(ele_im:ChromiumElement):
+    return ele_im.click()
+        
+@flow
+def response_im(tab:ChromiumPage, ele_im:ChromiumElement):
+    click_im_icon(ele_im)
+    ele_msg_red_pot = get_ele_msg_red_pot()
+    data,msg_item_div = get_im_unread_item_data(ele_msg_red_pot)
+
+@task
+def check_home_page(home_url:str='https://www.douyin.com/user/self'):
+    if "/user/self" not in tab.url:
+        tab.get(home_url)
+    tab.scroll.to_top()
+
+@task
+def get_im_icon_ele():
+    ele_im = tab.ele('xpath://div[@data-e2e="im-entry"]')
+    if ele_im:
+        return ele_im
+
+@task
+def get_im_icon_red_pot_ele(ele_im:ChromiumElement):
+    ele_im_red_pot = ele_im._find_elements('xpath://div[@class="LFWqFfyH isLight"]', raise_err=False)
+    # 存在私信小红点
+    ele_has_msg = ele_im_red_pot._find_elements('xpath:/div',raise_err=False)
+    return ele_has_msg
+
+
+
+@flow
+def check_has_im_msg_flow(home_url:str):
+    check_home_page(home_url)
+    ele_im = get_im_icon_ele()
+    ele_has_msg = get_im_icon_red_pot_ele(ele_im)
+    return ele_im, ele_has_msg
+
+
+@flow(log_prints=False)
+def im_chat_flow():
+    home_url:str='https://www.douyin.com/user/self'
+    print(f"home_url:{home_url}")
+    ele_im, ele_has_msg = check_has_im_msg_flow(home_url)
+    # 私信弹框的红色小红点
+    ele_list_dlg,ele_pop_red_pot = get_ele_msg_red_pot(ele_im)
+    # get_im_unread_item_data.with_options(result_storage_key=get_object_name_by_time())
+    unread_user_data = get_im_unread_item_user_data(ele_pop_red_pot)
+    user_profile_packet, conversation_detail = conversation.enter_conversation(unread_user_data, ele_list_dlg)
+    chat_history = conversation.get_conversations_history(unread_user_data, conversation_detail)
+    chat_history = conversation.save_unread_msg_to_db(chat_history, unread_user_data['unread_msg_count'])
+    reply_res = conversation.reply_to_user(chat_history, unread_user_data)
+    # 
+
+
+import chat_test
+def main():
+    im_chat_flow(tab=tab)
+
+if __name__ == "__main__":
+    print(os.environ["PREFECT_API_URL"])
+    mg_test = chat_test.Chat()
+    mg_test.send_msg("你好")
+    # auto_douyin()
+    im_chat_flow()
+    # im_chat_flow.with_options(result_storage=os.path.join(OUTPUT, "prefect", "{flow_run.flow_name}.json"))
+    # im_chat_flow()

+ 56 - 0
douyin/chat_test.py

@@ -0,0 +1,56 @@
+import os
+import sys
+sys.path.append(os.path.dirname(os.path.dirname(__file__)))
+
+from conf.config import logger
+from DrissionPage import ChromiumPage, ChromiumOptions
+
+options = ChromiumOptions().set_paths(
+    browser_path=r"C:\Program Files\Google\Chrome\Application\chrome.exe",
+    local_port=9226,
+    user_data_path=r'C:\Users\mrh\AppData\Local\Google\Chrome\User Data')
+mg_page = ChromiumPage(options)
+# page.new_tab("chrome://version/")
+# page.get("https://www.douyin.com/user/self")
+
+class Chat:
+    homepage = 'https://www.douyin.com/user/self'
+    def __init__(self, ) -> None:
+        self.tab_home = mg_page.tab
+        # logger.info(f"{self.tab_home.url}") 
+        self.check_home_page()
+    
+    def send_msg(self, msg):
+        ele_popShadowAnimation = self.tab_home.ele('xpath://div[@class="qbjZBApl popShadowAnimation"]')
+
+
+
+        # 找到输入框
+        # data-e2e="msg-input"
+        ele_input = ele_popShadowAnimation.ele('xpath://div[@data-e2e="msg-input"]')
+        ele_input.click()
+        ele_input.input(msg)
+
+        # 找到发送按钮
+        # span class="e2e-send-msg-btn"
+        ele_send = ele_popShadowAnimation.ele('xpath://span[contains(@class, "e2e-send-msg-btn")]')
+        ele_send.click()
+
+        logger.info(f"回复成功")
+
+
+    def run(self):
+        stranger_msg_count = 0
+        while True:
+            self.check_home_page()
+
+    def check_home_page(self):
+        if "/user/self" not in self.tab_home.url:
+            self.tab_home.get(self.homepage)
+
+def main():
+    mg = Chat()
+    mg.send_msg("测试")
+
+if __name__ == "__main__":
+    main()

A különbségek nem kerülnek megjelenítésre, a fájl túl nagy
+ 207 - 0
douyin/conversation.py


+ 0 - 0
dp/cookies.py → douyin/cookies.py


+ 40 - 0
douyin/user.py

@@ -0,0 +1,40 @@
+from conf.config import logger,OUTPUT
+from database.config import ai_yunying_db
+from dataset import Table
+
+
+class User:
+    def __init__(self, db=ai_yunying_db) -> None:
+        self.db = db
+        self.table_user:Table = self.db.get_table("user")
+
+    # 解析来自这个包请求返回的信息: https://www.douyin.com/aweme/v1/web/user/profile/other/
+    # 返回的包内容如: .\testm\user_profile_response.py
+    def filter_from_user_profile(self, body:dict):
+        user = body['user']
+        save_data = {
+            "uid": user.get('uid'),
+            "nickname": user.get('nickname'),
+            "avatar_medium": user.get("avatar_medium"),
+            "sec_uid": user.get('sec_uid'),
+            "signature": user.get('signature'),
+            "city": user.get('city'),
+            "ip_location": user.get('ip_location'),
+            "province": user.get('province'),
+            "school_name" : user.get('school_name'),
+            "follow_status": user.get('follow_status'),
+            "follower_count" : user.get('follower_count'),
+            "total_favorited" : user.get('total_favorited'),
+            "aweme_count": user.get('aweme_count'),
+        }
+        return save_data
+    def save_user_profile_to_db(self, body:dict):
+        save_data = self.filter_from_user_profile(body)
+        id = self.table_user.insert_ignore(save_data, keys=["uid"])
+        logger.debug(f"插入/存在则忽略用户 id {id}")
+        if id:
+            return self.table_user.find_one(id=id)
+        else:
+            return self.table_user.find_one(uid=save_data['uid'])
+        
+user = User()

+ 0 - 81
dp/chat.py

@@ -1,81 +0,0 @@
-import time
-import page
-from conf.config import logger
-
-class Chat:
-    homepage = 'https://www.douyin.com/user/self'
-    def __init__(self, ) -> None:
-        self.tab_home = page.page.tab
-        # logger.info(f"{self.tab_home.url}") 
-        self.check_home_page()
-    
-    def run(self):
-        stranger_msg_count = 0
-        while True:
-            self.check_home_page()
-            self.tab_home.scroll.to_top()
-            ele_im = self.tab_home.ele('xpath://div[@data-e2e="im-entry"]')
-            ele_im_red_pot = ele_im._find_elements('xpath://div[@class="LFWqFfyH isLight"]', raise_err=False)
-            # 存在私信小红点
-            ele_has_msg = ele_im_red_pot._find_elements('xpath:/div',raise_err=False)
-            if ele_has_msg:
-                logger.info(f"有未读消息 {ele_has_msg.text}")
-                ele_im.click()
-                self.response_frined()
-
-            stranger_msg_count +=1
-            if stranger_msg_count > 10:
-                stranger_msg_count = 0
-                ele_im.click()
-                ele_stranger = ele_im._find_elements("陌生人消息", raise_err=False)
-                if ele_stranger:
-                    logger.info(f"有陌生人消息")
-                    self.response_stranger()
-            time.sleep(1)
-    
-    def response_frined(self):
-        logger.info(f"回复好友")
-        # 找到私信弹出窗口
-        # data-e2e="listDlg-container"
-        ele_list_dlg = self.tab_home.ele('xpath://div[@data-e2e="listDlg-container"]')
-        logger.info(f"ele_list_dlg.html {ele_list_dlg.html}")
-        # 找到未读消息小红点
-        # class="hcPUqxqn"
-        ele_msg_red_pot = ele_list_dlg.ele('xpath://div[@class="hcPUqxqn"]')
-        ele_msg_red_pot.click()
-        # 找到聊天会话框
-        # class="qbjZBApl popShadowAnimation"
-        ele_popShadowAnimation = self.tab_home.ele('xpath://div[@class="qbjZBApl popShadowAnimation"]')
-        # 找到输入框
-        # data-e2e="msg-input"
-        ele_input = ele_popShadowAnimation.ele('xpath://div[@data-e2e="msg-input"]')
-        ele_input.click()
-        ele_input.input("hello")
-
-        # 找到发送按钮
-        # span class="e2e-send-msg-btn"
-        ele_send = ele_popShadowAnimation.ele('xpath://span[contains(@class, "e2e-send-msg-btn")]')
-        ele_send.click()
-        ele_popShadowAnimation.ele('退出会话').click()
-
-        logger.info(f"回复成功")
-
-    def response_stranger(self):
-        logger.info(f"回复陌生人")
-
-    def check_home_page(self):
-        if "/user/self" not in self.tab_home.url:
-            self.tab_home.get(self.homepage)
-
-
-def main():
-    self = Chat()
-    self.run()
-    # ele_im = self.tab_home.ele('xpath://div[@data-e2e="im-entry"]')
-    # logger.info(f"{ele_im.html}")
-    # # <div class="LFWqFfyH isLight">
-    # has_msg_ele = ele_im.ele('xpath://div[@class="LFWqFfyH isLight"]')
-    # logger.info(f"{has_msg_ele.html}")
-
-if __name__ == "__main__":
-    main()

+ 1 - 1
dp/page.py

@@ -12,7 +12,7 @@ from conf.config import logger,PAGE_OUTPUT,INI_PATH,chrome_options,find_edge_pat
 from DrissionPage import ChromiumOptions
 from DrissionPage.common import Settings
 
-Settings.raise_when_ele_not_found=True
+Settings.raise_when_ele_not_found=False
 
 page = ChromiumPage(chrome_options)
 

Nem az összes módosított fájl került megjelenítésre, mert túl sok fájl változott