mport requests from bs4 import BeautifulSoup import csv def get_top250_movies(): url = 'https://movie.douban.com/top250' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} movie_info_list = [] for i in range(0, 250, 25): params = {'start': str(i)} res = requests.get(url, headers=headers, params=params) soup = BeautifulSoup(res.text, 'html.parser') movie_list = soup.find_all('div', class_='info') for movie in movie_list: title = movie.find('span', class_='title').text info = movie.find('div', class_='bd').p.text.strip().split('\n') director = info[0][4:] actors = info[1][3:] year = info[1][-5:-1] rating = movie.find('span', class_='rating_num').text comment_num = movie.find('div', class_='star').find_all('span')[3].text[:-3] movie_info_list.append([title, director, actors, year, rating, comment_num]) return movie_info_list def save_to_csv(movie_info_list): with open('movie_info.csv', 'w', newline='', encoding='utf-8-sig') as f: writer = csv.writer(f) writer.writerow(['电影名称', '导演', '演员', '上映年份', '评分', '评论数']) for movie_info in movie_info_list: writer.writerow(movie_info) if name == 'main': movie_info_list = get_top250_movies() save_to_csv(movie_info_list) print('电影信息保存成功!') 在此代码的基础上对爬取的电影类型进行生成按照评分生成词云
时间: 2023-06-26 21:08:59 浏览: 148
Porta_COM_VB6.zip_VB6 COM_vb6
可以使用Python中的wordcloud库来生成词云。首先需要安装该库,可以使用以下命令进行安装:
```
pip install wordcloud
```
然后在获取电影信息的代码中添加以下代码:
```python
from wordcloud import WordCloud
import matplotlib.pyplot as plt
# 从电影信息中获取所有电影类型
genre_list = []
for movie_info in movie_info_list:
genres = movie_info[2].split('/')
genre_list.extend(genres)
# 生成词云
wordcloud = WordCloud(background_color='white', width=800, height=400).generate(' '.join(genre_list))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()
```
这段代码将所有电影类型存储在一个列表中,然后使用WordCloud生成词云图并显示。可以根据需要调整词云的参数,例如背景颜色、大小等。最终生成的词云图可以使用Matplotlib库进行显示。
阅读全文