t.py 1.2 KB

1234567891011121314151617181920212223242526272829303132333435363738
  1. import asyncio
  2. from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
  3. from mylib.base import OUTPUT_DIR
  4. async def main():
  5. # 1) Reference your persistent data directory
  6. browser_config = BrowserConfig(
  7. headless=False, # 'True' for automated runs
  8. verbose=True,
  9. use_managed_browser=False, # Enables persistent browser strategy
  10. use_persistent_context=True,
  11. browser_type="chromium",
  12. user_data_dir=OUTPUT_DIR / "user_data_dir2"
  13. )
  14. # 2) Standard crawl config
  15. search_key='Acalypha malabarica essential oil'
  16. start=30
  17. url = f"https://www.google.com/search?q={search_key}&start={start}"
  18. print(f"search url: {url}")
  19. crawl_config = CrawlerRunConfig(
  20. wait_for="css:.logged-in-content",
  21. url=url,
  22. )
  23. browser_config.url = url
  24. async with AsyncWebCrawler(config=browser_config) as crawler:
  25. # crawler = AsyncWebCrawler(config=browser_config)
  26. # await crawler.start()
  27. result = await crawler.arun(
  28. url=url,
  29. config=crawl_config,
  30. )
  31. # save_to_pickle(result, OUTPUT_DIR / f"{search_key}.pickle")
  32. return result
  33. if __name__ == "__main__":
  34. asyncio.run(main())