| 1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- import sys
- import os
- import pathlib
- from DrissionPage import ChromiumOptions
- import dataset
- from loguru import logger
- HOST='localhost'
- PORT=9226
- WORK_DIR = pathlib.Path(__file__).parent.parent
- CONF_DIR = WORK_DIR/'conf'
- OUTPUT = WORK_DIR/'output'
- PAGE_OUTPUT = OUTPUT/'page'
- if len(sys.argv)>=3 and sys.argv[1] == "-c":
- INI_PATH = sys.argv[2]
- else:
- INI_PATH = CONF_DIR/'dp_configs.ini'
- if not os.path.exists(OUTPUT):
- os.mkdir(OUTPUT)
- if not os.path.exists(PAGE_OUTPUT):
- os.mkdir(PAGE_OUTPUT)
- logger.remove()
- # logger.add(sys.stderr, format='<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>')
- logger.add(sys.stderr, level="INFO", format='<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{file}</cyan>:<cyan>{line}</cyan> :<cyan>{function}</cyan> - {message}')
- logger.add(os.path.join(OUTPUT, "all.log"), level="DEBUG", format='<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{file}</cyan>:<cyan>{line}</cyan> :<cyan>{function}</cyan> - {message}')
- logger.debug(f"WORK_DIR {WORK_DIR}")
- logger.debug(f"INI_PATH {INI_PATH}")
- db = dataset.connect(f'sqlite:///{OUTPUT}/douyin.db')
- def find_edge_path_in_registry():
- import winreg as reg
- path = None
- try:
- key = reg.OpenKey(reg.HKEY_LOCAL_MACHINE, r"SOFTWARE\Microsoft\Windows\CurrentVersion\App Paths\msedge.exe")
- path = reg.QueryValueEx(key, "")[0]
- reg.CloseKey(key)
- except WindowsError:
- pass
- return path
- logger.debug(f"find_edge browser path: {find_edge_path_in_registry()}")
- USER_DATA = OUTPUT/'userData_ai-yunying'
- # BROWSER_PATH 值理论无需手动设置,因为该模块会自动在默认路径查找 Chrome ,因此下列判断可以删去
- if 'win' in sys.platform:
- if not os.path.exists(INI_PATH):
- from DrissionPage.common import configs_to_here
- # path = r'C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe'
- path = find_edge_path_in_registry()
- # 生成默认配置文件
- chrome_options = ChromiumOptions(False, None)
- chrome_options.set_address(f"{HOST}:{PORT}")
- chrome_options.set_browser_path(path)
- chrome_options.set_user_data_path(USER_DATA)
- chrome_options.save(INI_PATH)
- else:
- chrome_options = ChromiumOptions(True, ini_path=INI_PATH)
- logger.debug(f"load init {INI_PATH}")
- logger.debug(f"chrome_options.browser_path {chrome_options.browser_path}")
- elif sys.platform == 'linux':
- chrome_options = ChromiumOptions(ini_path=INI_PATH)
- # import consul_srv_client
- # ip,port = consul_srv_client.get_srv("prefect.service.consul")
- prefect_api = f"http://{'10.0.0.1'}:{'8612'}/api"
- # 在导入 prefect 之前设置环境变量
- os.environ["PREFECT_API_URL"] = prefect_api
- G4F_API = "http://pc:8003/"
|