import asyncio from crawl4ai import * from get_article_info import get_rearch_result_links from mylib.base import save_to_file, save_all_result,OUTPUT_DIR,load_from_pickle async def main(): async with AsyncWebCrawler() as crawler: result = await crawler.arun( url="https://en.wikipedia.org/wiki/Aciphylla", cache_mode=CacheMode.ENABLED, ) # output/Aciphylla/ save_all_result(result, OUTPUT_DIR / "Aciphylla") def result_dump(output_dir): resutl:CrawlResult = load_from_pickle(output_dir /"result.pickle") save_to_file(resutl.markdown_v2.raw_markdown, output_dir / "markdown_v2_raw_markdown.md") save_to_file(resutl.markdown_v2.references_markdown, output_dir / "markdown_v2_references_markdown.md") save_to_file(resutl.markdown_v2.markdown_with_citations, output_dir / "markdown_v2_markdown_with_citations.md") save_to_file(resutl.markdown_v2.fit_markdown, output_dir / "markdown_v2_fit_markdown.md") save_to_file(resutl.markdown_v2.fit_html, output_dir / "markdown_v2_fit_html.md") save_to_file(resutl.markdown, output_dir / "markdown.md") save_to_file(resutl.markdown_v2, output_dir / "markdown_v2.md") # model_dump_json save_to_file(resutl.model_dump_json(), output_dir / "result.json") save_to_file(resutl.markdown.model_dump_json(), output_dir / "markdown.json") if __name__ == "__main__": # asyncio.run(main()) result_dump(OUTPUT_DIR / "Aciphylla")