| 123456789101112131415161718192021222324252627282930313233 |
- import asyncio
- from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
- import requests
- from bs4 import BeautifulSoup
- import json
- from dotenv import load_dotenv
- from scrapegraphai.graphs import ScriptCreatorGraph
- from scrapegraphai.utils import prettify_exec_info
- load_dotenv()
- def t_main():
- url = "https://perinim.github.io/projects"
- response = requests.get(url)
- soup = BeautifulSoup(response.content, 'html.parser')
- news_list = []
- for news in soup.find_all('div', class_='news-item'):
- title = news.find('h2').text.strip()
- description = news.find('p').text.strip()
- news_list.append({
- "title": title,
- "description": description
- })
- print(json.dumps(news_list, indent=4))
- async def main():
- t_main()
- # s = '''python\nimport requests\nfrom bs4 import BeautifulSoup\nimport json\n\ndef main():\n url = \"https://perinim.github.io/projects\"\n response = requests.get(url)\n soup = BeautifulSoup(response.content, 'html.parser')\n \n news_list = []\n \n for news in soup.find_all('div', class_='news-item'):\n title = news.find('h2').text.strip()\n description = news.find('p').text.strip()\n news_list.append({\n \"title\": title,\n \"description\": description\n })\n \n print(json.dumps(news_list, indent=4))\n\nif __name__ == \"__main__\":\n main()\n'''
- # print(s)
- if __name__ == "__main__":
- asyncio.run(main())
|