python爬取豆瓣电影top250并保存为xlsx_【python】Python爬豆瓣电影top250导出Excel
时间: 2023-12-31 17:04:54 浏览: 127
好的,以下是Python代码实现:
```python
import requests
from bs4 import BeautifulSoup
import openpyxl
# 发送请求并获取页面内容
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.146 Safari/537.36'
}
response = requests.get(url, headers=headers)
response.encoding = 'utf-8'
html = response.text
return html
# 解析页面内容,并返回数据
def parse_html(html):
soup = BeautifulSoup(html, 'html.parser')
movie_list = soup.find('ol', class_='grid_view').find_all('li')
data = []
for movie in movie_list:
title = movie.find('div', class_='hd').find('span', class_='title').get_text() # 电影名称
star = movie.find('div', class_='star').find('span', class_='rating_num').get_text() # 评分
quote = movie.find('span', class_='inq').get_text() if movie.find('span', class_='inq') else '' # 引言
data.append((title, star, quote))
return data
# 保存数据到Excel文件
def save_to_xlsx(data, filename):
workbook = openpyxl.Workbook()
sheet = workbook.active
sheet.title = '豆瓣电影Top250'
sheet.cell(row=1, column=1, value='电影名称')
sheet.cell(row=1, column=2, value='评分')
sheet.cell(row=1, column=3, value='引言')
for i, item in enumerate(data):
sheet.cell(row=i+2, column=1, value=item[0])
sheet.cell(row=i+2, column=2, value=item[1])
sheet.cell(row=i+2, column=3, value=item[2])
workbook.save(filename)
if __name__ == '__main__':
url = 'https://movie.douban.com/top250'
html = get_html(url)
data = parse_html(html)
save_to_xlsx(data, '豆瓣电影Top250.xlsx')
```
这段代码会爬取豆瓣电影Top250的数据,并将数据保存到Excel文件中。你只需要将代码复制到Python文件中,然后在终端中运行即可。Excel文件会保存在当前目录下。
阅读全文