import asyncio import os from pathlib import Path import sys from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig import requests from bs4 import BeautifulSoup import json from dotenv import load_dotenv import sys from pathlib import Path sys.path.append(Path(r'G:\code\upwork\zhang_crawl_bio')) os.environ['DB_URL'] = 'sqlite:///' + str(Path(r'G:\code\upwork\zhang_crawl_bio\output\search_results copy.db')) from worker.celery.search_client import get_uncompleted_keywords import yaml import socket from mylib.logu import logger # load_dotenv() config_path = Path(f"config/pc_configs/{'pc1'}.yaml") def t_main(): res = get_uncompleted_keywords() print(res) print(len(res)) async def main(): t_main() return print(len(None)) # s = '''python\nimport requests\nfrom bs4 import BeautifulSoup\nimport json\n\ndef main():\n url = \"https://perinim.github.io/projects\"\n response = requests.get(url)\n soup = BeautifulSoup(response.content, 'html.parser')\n \n news_list = []\n \n for news in soup.find_all('div', class_='news-item'):\n title = news.find('h2').text.strip()\n description = news.find('p').text.strip()\n news_list.append({\n \"title\": title,\n \"description\": description\n })\n \n print(json.dumps(news_list, indent=4))\n\nif __name__ == \"__main__\":\n main()\n''' # print(s) if __name__ == "__main__": asyncio.run(main())