| 1234567891011121314151617181920212223242526272829303132333435363738 |
- import asyncio
- from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
- from mylib.base import OUTPUT_DIR
- async def main():
- # 1) Reference your persistent data directory
- browser_config = BrowserConfig(
- headless=False, # 'True' for automated runs
- verbose=True,
- use_managed_browser=False, # Enables persistent browser strategy
- use_persistent_context=True,
- browser_type="chromium",
- user_data_dir=OUTPUT_DIR / "user_data_dir2"
- )
- # 2) Standard crawl config
- search_key='Acalypha malabarica essential oil'
- start=30
- url = f"https://www.google.com/search?q={search_key}&start={start}"
- print(f"search url: {url}")
- crawl_config = CrawlerRunConfig(
- wait_for="css:.logged-in-content",
- url=url,
- )
- browser_config.url = url
- async with AsyncWebCrawler(config=browser_config) as crawler:
- # crawler = AsyncWebCrawler(config=browser_config)
- # await crawler.start()
- result = await crawler.arun(
- url=url,
- config=crawl_config,
- )
- # save_to_pickle(result, OUTPUT_DIR / f"{search_key}.pickle")
- return result
- if __name__ == "__main__":
- asyncio.run(main())
|