python爬取豆瓣电影top250电影名,并用可视化图表显示出来
时间: 2023-06-27 07:08:36 浏览: 129
好的,以下是实现代码:
```
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
# 爬取豆瓣电影top250的电影名
def get_movies():
movies = []
for i in range(0, 250, 25):
url = 'https://movie.douban.com/top250?start={}&filter='.format(i)
html = requests.get(url).text
soup = BeautifulSoup(html, 'html.parser')
for item in soup.find_all('div', class_='hd'):
movies.append(item.a.span.text)
return movies
# 将电影名按首字母分类,并统计数量
def classify_movies(movies):
classified_movies = {}
for movie in movies:
first_letter = movie[0]
if first_letter not in classified_movies:
classified_movies[first_letter] = 1
else:
classified_movies[first_letter] += 1
return classified_movies
# 可视化图表显示电影数量
def show_chart(classified_movies):
x_data = list(classified_movies.keys())
y_data = list(classified_movies.values())
plt.bar(x_data, y_data)
plt.title('Movies by First Letter')
plt.xlabel('First Letter')
plt.ylabel('Number of Movies')
plt.show()
if __name__ == '__main__':
movies = get_movies()
classified_movies = classify_movies(movies)
show_chart(classified_movies)
```
运行后会弹出如下的可视化图表:
![image.png](attachment:image.png)
图表展示了豆瓣电影top250中的电影数量按首字母分类的情况。
阅读全文