import asyncio from crawl4ai import * from pathlib import Path import json output_dir = Path("output") # 读取output\links.json "external" 键值,得到列表 def read_links_from_json(file_path): with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file,) links = data.get("external", []) return links # 筛选列表中键 "base_domain" 的值不含有 google def filter_links(links): filtered_links = [link for link in links if "google" not in link["base_domain"]] return filtered_links def get_rearch_result_links(file_path): links = read_links_from_json(file_path) filtered_links = filter_links(links) return filtered_links def main(): filtered_links = get_rearch_result_links("output/links.json") print(filtered_links) print(len(filtered_links)) if __name__ == "__main__": main()