import asyncio,aiohttp,aiofiles,os from gevent import monkey from bs4 import BeautifulSoup import nest_asyncio nest_asyncio.apply() class VoaCrawler: def __init__(self, url): self.url = url async def fetch_mp3_urls(self, page_url): async with aiohttp.ClientSession() as session: async with session.get(page_url) as response: html = await response.text() soup = BeautifulSoup(html, 'html.parser') mp3_urls = [] for link in soup.find_all('a'): href = link.get('href') if href and href.endswith('.mp3'): mp3_urls.append(href) self.mp3_urls=mp3_urls async def download_mp3(self): async for mp3_url in self.mp3_urls: async with aiohttp.ClientSession() as session: async with session.get(mp3_url) as resp: os.chdir("/Users/lihanjing/Desktop/python/每周作业/作业week13/vedios") if resp.status == 200: async with aiofiles.open(mp3_url.split('/')[-1], mode='wb') as f: await f.write(await resp.content.read()) crawler = VoaCrawler('https://www.51voa.com/VOA_Standard_3.html') loop=asyncio.get_event_loop() tasks= [loop.create_task(crawler.fetch_mp3_urls(5)),loop.create_task(crawler.download_mp3())] loop.run_until_complete(asyncio.wait(tasks)) loop.close()
时间: 2023-12-06 14:43:59 浏览: 302
asyncio.pdf
这段代码是一个使用异步编程方式爬取VOA网站的mp3文件链接并下载到本地的爬虫程序。它使用了Python中的asyncio模块和aiohttp库来实现异步HTTP请求和异步文件读写操作。具体来说,它定义了一个VoaCrawler类,其中包含fetch_mp3_urls和download_mp3两个异步方法,前者用于从VOA网站获取mp3文件链接,后者用于将这些链接对应的文件下载到本地。在主函数中,它创建了一个VoaCrawler对象,并使用asyncio模块创建了两个任务,并通过run_until_complete方法启动了这两个任务的执行,从而实现了异步爬取和下载。
阅读全文