import base64 import datetime import json import pickle import os import sys sys.path.append(os.path.dirname(os.path.dirname(__file__))) from conf.config import OUTPUT,PAGE_OUTPUT,logger from database.config import minio_block import prefect.runtime from dp.page import page import time from datetime import datetime from prefect import runtime from database.config import minio_client from database.s3 import S3Object,S3 HOME_URL = 'https://www.douyin.com/user/self' tab=page.get_tab(url=HOME_URL) def get_tab(tab_id=None): if tab_id: return page.get_tab(tab_id) else: tab_id = runtime.flow_run.parameters.get('tab_id', None) if tab_id: return page.get_tab(tab_id) else: return page.tab def get_object_name_by_time(): # 获取时分秒毫秒,并且符合路径的格式,如 023213_123.json now = datetime.now() # 格式化时间:小时、分钟、秒、毫秒 formatted_time = now.strftime("%Y%m%d/%H%M%S_%f") # %f 提供了微秒,所以我们取前三个数字作为毫秒 return formatted_time + '-{task_run.task_name}' def get_result(path:str): if path.startswith(minio_block.basepath): path = path[len(minio_block.basepath):] bytes = minio_block.read_path(path) json_data = json.loads(bytes) base64_data = json_data['data'] decoded_data = base64.b64decode(base64_data) result = pickle.loads(decoded_data) return result def save_html_to_s3(file_name): page_dir = Path(r'I:\code\ai-yunying\live-online-people\output\page\\') file_path = page_dir/file_name f = open(file_path, 'w') f.write(tab.html) s3minio = S3(bucket='public',client=minio_client) base_path = '/md/ai-yunying/' obj_name = base_path+ s3minio.get_object_name_by_time() + '-'+ file_name res = s3minio.fput(r'I:\code\ai-yunying\live-online-people\output\page\\'+file_name, obj_name) logger.info(f"{res.bucket_name } {res.object_name} {res._http_headers}") import pathlib def save_page_info(file_name='', tab_id=None, local=True,s3=False): base_time_dir = pathlib.Path(datetime.now().strftime("%Y-%m-%d")) tab = get_tab(tab_id) png = tab.get_screenshot(as_bytes=True) if local: save_dir = OUTPUT/base_time_dir logger.info(f"{save_dir}") if not os.path.exists(save_dir): os.makedirs(save_dir) with open(save_dir/f'{file_name}.html', 'w') as f: f.write(tab.html) with open(save_dir/f'{file_name}.png', 'wb') as f: f.write(png) if s3: s3minio = S3(bucket='swl',client=minio_client) # base_path = pathlib.Path('/log') / base_time_dir # obj_name = base_path+ s3minio.get_object_name_by_time() + '-'+ file_name res = s3minio.put(tab.html, f'{file_name}.html') logger.info(f"{res.bucket_name } {res.object_name} {res._http_headers}") res = s3minio.put(png, f'{file_name}.png') return base_time_dir def main(): save_page_info(file_name='点击陌生人对话框', tab_id='A017888A62FE53FAD9D85ED2662FEA34') if __name__ == "__main__": main()