t.py 1.5 KB

123456789101112131415161718192021222324252627282930313233
  1. import asyncio
  2. from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
  3. import requests
  4. from bs4 import BeautifulSoup
  5. import json
  6. from dotenv import load_dotenv
  7. from scrapegraphai.graphs import ScriptCreatorGraph
  8. from scrapegraphai.utils import prettify_exec_info
  9. load_dotenv()
  10. def t_main():
  11. url = "https://perinim.github.io/projects"
  12. response = requests.get(url)
  13. soup = BeautifulSoup(response.content, 'html.parser')
  14. news_list = []
  15. for news in soup.find_all('div', class_='news-item'):
  16. title = news.find('h2').text.strip()
  17. description = news.find('p').text.strip()
  18. news_list.append({
  19. "title": title,
  20. "description": description
  21. })
  22. print(json.dumps(news_list, indent=4))
  23. async def main():
  24. t_main()
  25. # s = '''python\nimport requests\nfrom bs4 import BeautifulSoup\nimport json\n\ndef main():\n url = \"https://perinim.github.io/projects\"\n response = requests.get(url)\n soup = BeautifulSoup(response.content, 'html.parser')\n \n news_list = []\n \n for news in soup.find_all('div', class_='news-item'):\n title = news.find('h2').text.strip()\n description = news.find('p').text.strip()\n news_list.append({\n \"title\": title,\n \"description\": description\n })\n \n print(json.dumps(news_list, indent=4))\n\nif __name__ == \"__main__\":\n main()\n'''
  26. # print(s)
  27. if __name__ == "__main__":
  28. asyncio.run(main())