python爬虫酷狗音乐top20
时间: 2023-12-22 18:28:24 浏览: 107
以下是爬取酷狗音乐TOP20的Python代码:
```python
import requests
from bs4 import BeautifulSoup
import csv
# 请求头信息
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 获取HTML页面
def get_html(url):
try:
r = requests.get(url, headers=headers)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
# 解析HTML页面
def parse_html(html):
soup = BeautifulSoup(html, 'html.parser')
ranks = soup.find_all('span', class_='pc_temp_num')
titles = soup.find_all('a', class_='pc_temp_songname')
times = soup.find_all('span', class_='pc_temp_time')
for rank, title, time in zip(ranks, titles, times):
rank = rank.get_text().strip()
title = title.get_text().split('-')[1].strip()
author = title.get_text().split('-')[0].strip()
time = time.get_text().strip()
yield {'rank': rank, 'title': title, 'author': author, 'time': time}
# 保存数据到CSV文件
def save_to_csv(data):
with open('music.csv', 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(['排名', '歌曲名称', '歌手', '时长'])
for item in data:
writer.writerow([item['rank'], item['title'], item['author'], item['time']])
# 主函数
def main():
url = 'https://www.kugou.com/yy/rank/home/1-8888.html?from=rank'
html = get_html(url)
data = parse_html(html)
save_to_csv(data)
if __name__ == '__main__':
main()
```
阅读全文