| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748 |
- import base64
- import datetime
- import json
- import pickle
- import os
- import sys
- sys.path.append(os.path.dirname(os.path.dirname(__file__)))
- from conf.config import OUTPUT,PAGE_OUTPUT
- from database.config import minio_block
- import prefect.runtime
- from pyunit_time import Time
- from dp.page import page
- import time
- from datetime import datetime
- tab=page.get_tab(url='https://www.douyin.com/user/self')
- def get_object_name_by_time():
- # 获取时分秒毫秒,并且符合路径的格式,如 023213_123.json
- now = datetime.now()
- # 格式化时间:小时、分钟、秒、毫秒
- formatted_time = now.strftime("%Y%m%d/%H%M%S_%f") # %f 提供了微秒,所以我们取前三个数字作为毫秒
- return formatted_time + '-{task_run.task_name}'
- def get_result(path:str):
- if path.startswith(minio_block.basepath):
- path = path[len(minio_block.basepath):]
- bytes = minio_block.read_path(path)
- json_data = json.loads(bytes)
- base64_data = json_data['data']
- decoded_data = base64.b64decode(base64_data)
- result = pickle.loads(decoded_data)
- return result
-
- def save_page_info(local=False, file_name=''):
- if not file_name:
- file_name = datetime.now().strftime("%Y%m%d-%H%M%S_%f")
- if local:
- f = open(PAGE_OUTPUT/f'tab-{file_name}.html', 'w')
- f.write(tab.html)
- # tab.html
- minio_block.write_path(f'{file_name}.html',tab.html.encode() )
- img_bytes = tab.get_screenshot(as_bytes=True)
- minio_block.write_path(f'{file_name}.png',img_bytes )
|