base.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748
  1. import base64
  2. import datetime
  3. import json
  4. import pickle
  5. import os
  6. import sys
  7. sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  8. from conf.config import OUTPUT,PAGE_OUTPUT
  9. from database.config import minio_block
  10. import prefect.runtime
  11. from pyunit_time import Time
  12. from dp.page import page
  13. import time
  14. from datetime import datetime
  15. tab=page.get_tab(url='https://www.douyin.com/user/self')
  16. def get_object_name_by_time():
  17. # 获取时分秒毫秒,并且符合路径的格式,如 023213_123.json
  18. now = datetime.now()
  19. # 格式化时间:小时、分钟、秒、毫秒
  20. formatted_time = now.strftime("%Y%m%d/%H%M%S_%f") # %f 提供了微秒,所以我们取前三个数字作为毫秒
  21. return formatted_time + '-{task_run.task_name}'
  22. def get_result(path:str):
  23. if path.startswith(minio_block.basepath):
  24. path = path[len(minio_block.basepath):]
  25. bytes = minio_block.read_path(path)
  26. json_data = json.loads(bytes)
  27. base64_data = json_data['data']
  28. decoded_data = base64.b64decode(base64_data)
  29. result = pickle.loads(decoded_data)
  30. return result
  31. def save_page_info(local=False, file_name=''):
  32. if not file_name:
  33. file_name = datetime.now().strftime("%Y%m%d-%H%M%S_%f")
  34. if local:
  35. f = open(PAGE_OUTPUT/f'tab-{file_name}.html', 'w')
  36. f.write(tab.html)
  37. # tab.html
  38. minio_block.write_path(f'{file_name}.html',tab.html.encode() )
  39. img_bytes = tab.get_screenshot(as_bytes=True)
  40. minio_block.write_path(f'{file_name}.png',img_bytes )