cookies.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. import asyncio
  2. import os
  3. import random
  4. import re
  5. import time
  6. import os
  7. import sys
  8. sys.path.append(os.path.dirname(os.path.dirname(__file__)))
  9. from DrissionPage import ChromiumPage
  10. from DrissionPage import ChromiumOptions
  11. from conf.config import logger,PAGE_OUTPUT,INI_PATH,chrome_options,db,USER_DATA,OUTPUT
  12. from DrissionPage import ChromiumOptions
  13. from DrissionPage.common import Settings
  14. from faker import Faker
  15. import DrissionPage
  16. from fake_useragent import UserAgent
  17. from dataset import Table
  18. table:Table = db['cookie']
  19. def get_browser_fake_info():
  20. # https://github.com/joke2k/faker/wiki
  21. fake = Faker()
  22. while True:
  23. user_agent = fake.user_agent()
  24. mac_platform_token = fake.mac_platform_token()
  25. if "Windows" in user_agent:
  26. break
  27. return user_agent
  28. page = None
  29. # https://dataset.readthedocs.io/en/latest/api.html
  30. def create_ua_header():
  31. ua = UserAgent(os=["windows", "macos"])
  32. res = ua.getRandom
  33. logger.info(f"{ua}")
  34. version = res['version']
  35. vnum = int(version)
  36. header = {
  37. "Sec-Ch-Ua": f'";Not A Brand";v="99", "Chromium";v="{vnum}"',
  38. "Sec-Ch-Ua-Platform": "Windows" if 'win' in res['os'] else "macOS",
  39. "User-Agent": res['useragent'],
  40. }
  41. # logger.info(f"{res}")
  42. # logger.info(f"{header}")
  43. return header
  44. def gen_douyin_cookies():
  45. # header = {'Sec-Ch-Ua': '";Not A Brand";v="99", "Chromium";v="122"', 'Sec-Ch-Ua-Platform': 'Windows', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Config/92.2.2788.20'}
  46. header = create_ua_header()
  47. logger.info(f"header {header}")
  48. chrome_options.set_user_data_path(USER_DATA + '1')
  49. page = ChromiumPage(chrome_options)
  50. try:
  51. page.set.cookies.clear()
  52. except Exception as e:
  53. logger.exception(f"{e}")
  54. page.quit()
  55. return
  56. page.set.load_mode.normal()
  57. page.set.headers(headers=header)
  58. logger.info(f"address {chrome_options.address}")
  59. logger.info(f"user_data_path {chrome_options.user_data_path}")
  60. logger.info(f"start '{page._chromium_options._browser_path}'")
  61. logger.info(f"process_id {page.process_id}")
  62. # page.get("edge://version/")
  63. url="https://www.douyin.com/"
  64. # tab = page.new_tab()
  65. tab = page
  66. retry = 3
  67. while retry:
  68. try:
  69. logger.info(f"start listen")
  70. tab.listen.start("www.douyin.com", method="GET")
  71. tab.get(url)
  72. i = 0
  73. for packet in tab.listen.steps(timeout=5):
  74. if not packet.url.startswith('https'):
  75. continue
  76. # logger.info(f"{packet.url} {packet.request.headers}")
  77. cookie = packet.request.extra_info.headers["cookie"]
  78. logger.debug(f"cookie {cookie}")
  79. # page.quit()
  80. if len(cookie) > 1600:
  81. ret = {
  82. "cookies": cookie,
  83. "user_agent": header['User-Agent'],
  84. "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())),
  85. "create_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time())),
  86. }
  87. logger.debug(f"ret {ret}")
  88. page.quit()
  89. return ret
  90. i += 1
  91. if i == 5:
  92. break
  93. # res = tab.listen.wait(1, timeout=5, raise_err=True)
  94. # logger.debug(f"res {res.request.headers}")
  95. # logger.debug(f"res.request.extra_info.headers {res.request.extra_info.headers}")
  96. except DrissionPage.errors.WaitTimeoutError as e:
  97. logger.error(f"{e}")
  98. except Exception as e:
  99. logger.exception(e)
  100. tab.listen.clear()
  101. retry -= 1
  102. logger.info(f"retry {retry}")
  103. page.quit()
  104. def save_to_db(data):
  105. table = db['cookie']
  106. table.insert(data)
  107. def try_gen_douyin_cookies():
  108. while True:
  109. try:
  110. ret = gen_douyin_cookies()
  111. # logger.info(f"ret {ret}")
  112. save_to_db(ret)
  113. break
  114. # Ctrl+c cancel_error
  115. except KeyboardInterrupt:
  116. logger.info("KeyboardInterrupt")
  117. page.quit()
  118. sys.exit(0)
  119. except DrissionPage.errors.WaitTimeoutError as e:
  120. logger.error(f"{e}")
  121. except Exception as e:
  122. logger.exception(e)
  123. def get_from_table():
  124. count = table.count()
  125. # random choise
  126. if count > 0:
  127. index = random.randint(0, count-1)
  128. row = table.find_one(id=index)
  129. # logger.info(f"{row['id']}")
  130. # logger.info(f"row {row}")
  131. return row
  132. else:
  133. logger.info("no cookie")
  134. return None
  135. def main():
  136. # ua = create_ua_header()
  137. # gen_douyin_cookies()
  138. # try_gen_douyin_cookies()
  139. get_from_table()
  140. # table.create_column('key')
  141. # table.delete(id=6)
  142. if __name__ == "__main__":
  143. main()