part-time-job
/
zhang_crawl_bio


			
				
					
						
						
							123456789101112131415161718192021222324252627282930313233
							import asyncio
from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
import requests
from bs4 import BeautifulSoup
import json
from dotenv import load_dotenv
from scrapegraphai.graphs import ScriptCreatorGraph
from scrapegraphai.utils import prettify_exec_info

load_dotenv()

def t_main():
    url = "https://perinim.github.io/projects"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    news_list = []

    for news in soup.find_all('div', class_='news-item'):
        title = news.find('h2').text.strip()
        description = news.find('p').text.strip()
        news_list.append({
            "title": title,
            "description": description
        })

    print(json.dumps(news_list, indent=4))
async def main():
    t_main()
    # s = '''python\nimport requests\nfrom bs4 import BeautifulSoup\nimport json\n\ndef main():\n    url = \"https://perinim.github.io/projects\"\n    response = requests.get(url)\n    soup = BeautifulSoup(response.content, 'html.parser')\n    \n    news_list = []\n    \n    for news in soup.find_all('div', class_='news-item'):\n        title = news.find('h2').text.strip()\n        description = news.find('p').text.strip()\n        news_list.append({\n            \"title\": title,\n            \"description\": description\n        })\n    \n    print(json.dumps(news_list, indent=4))\n\nif __name__ == \"__main__\":\n    main()\n'''
    # print(s)
if __name__ == "__main__":
    asyncio.run(main())