base.py 2.0 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. import base64
  2. import datetime
  3. import json
  4. import pickle
  5. import os
  6. import sys
  7. sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  8. from conf.config import OUTPUT,PAGE_OUTPUT
  9. from database.config import minio_block
  10. import prefect.runtime
  11. from dp.page import page
  12. import jionlp as jio
  13. import time
  14. from datetime import datetime
  15. tab=page.get_tab(url='https://www.douyin.com/user/self')
  16. def get_object_name_by_time():
  17. # 获取时分秒毫秒,并且符合路径的格式,如 023213_123.json
  18. now = datetime.now()
  19. # 格式化时间:小时、分钟、秒、毫秒
  20. formatted_time = now.strftime("%Y%m%d/%H%M%S_%f") # %f 提供了微秒,所以我们取前三个数字作为毫秒
  21. return formatted_time + '-{task_run.task_name}'
  22. def get_result(path:str):
  23. if path.startswith(minio_block.basepath):
  24. path = path[len(minio_block.basepath):]
  25. bytes = minio_block.read_path(path)
  26. json_data = json.loads(bytes)
  27. base64_data = json_data['data']
  28. decoded_data = base64.b64decode(base64_data)
  29. result = pickle.loads(decoded_data)
  30. return result
  31. def save_page_info(local=False, file_name=''):
  32. if not file_name:
  33. file_name = datetime.now().strftime("%Y%m%d-%H%M%S_%f")
  34. if local:
  35. f = open(PAGE_OUTPUT/f'tab-{file_name}.html', 'w')
  36. f.write(tab.html)
  37. # tab.html
  38. minio_block.write_path(f'{file_name}.html',tab.html.encode() )
  39. img_bytes = tab.get_screenshot(as_bytes=True)
  40. minio_block.write_path(f'{file_name}.png',img_bytes )
  41. def main():
  42. for row in chat_history_table:
  43. time_base = row.get("create_time")
  44. cn_time = row.get("time")
  45. timestamp = cn_time_to_timestamp(cn_time, time_base=time_base)
  46. if timestamp:
  47. str_time = datetime.strftime(datetime.fromtimestamp(timestamp), '%Y-%m-%d %H:%M:%S')
  48. else:
  49. str_time = None
  50. print(f"timestamp {timestamp} \t\t str {str_time} \t\t time_base {time_base}")
  51. if __name__ == "__main__":
  52. main()