| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465 |
- import base64
- import datetime
- import json
- import pickle
- import os
- import sys
- sys.path.append(os.path.dirname(os.path.dirname(__file__)))
- from conf.config import OUTPUT,PAGE_OUTPUT
- from database.config import minio_block
- import prefect.runtime
- from dp.page import page
- import jionlp as jio
- import time
- from datetime import datetime
- tab=page.get_tab(url='https://www.douyin.com/user/self')
- def get_object_name_by_time():
- # 获取时分秒毫秒,并且符合路径的格式,如 023213_123.json
- now = datetime.now()
- # 格式化时间:小时、分钟、秒、毫秒
- formatted_time = now.strftime("%Y%m%d/%H%M%S_%f") # %f 提供了微秒,所以我们取前三个数字作为毫秒
- return formatted_time + '-{task_run.task_name}'
- def get_result(path:str):
- if path.startswith(minio_block.basepath):
- path = path[len(minio_block.basepath):]
- bytes = minio_block.read_path(path)
- json_data = json.loads(bytes)
- base64_data = json_data['data']
- decoded_data = base64.b64decode(base64_data)
- result = pickle.loads(decoded_data)
- return result
-
- def save_page_info(local=False, file_name=''):
- if not file_name:
- file_name = datetime.now().strftime("%Y%m%d-%H%M%S_%f")
- if local:
- f = open(PAGE_OUTPUT/f'tab-{file_name}.html', 'w')
- f.write(tab.html)
- # tab.html
- minio_block.write_path(f'{file_name}.html',tab.html.encode() )
- img_bytes = tab.get_screenshot(as_bytes=True)
- minio_block.write_path(f'{file_name}.png',img_bytes )
-
- def main():
- for row in chat_history_table:
- time_base = row.get("create_time")
- cn_time = row.get("time")
- timestamp = cn_time_to_timestamp(cn_time, time_base=time_base)
- if timestamp:
- str_time = datetime.strftime(datetime.fromtimestamp(timestamp), '%Y-%m-%d %H:%M:%S')
- else:
- str_time = None
- print(f"timestamp {timestamp} \t\t str {str_time} \t\t time_base {time_base}")
- if __name__ == "__main__":
- main()
|