# 每年上映电影数量统计 movies_year_count=original_format.groupby(['title_year'])['movie_title'].count() movies_year_count.plot(figsize=(10,5),marker='.') plt.title('每年上映电影数量统计',fontsize=22) plt.xlabel('年份',fontsize=15) plt.ylabel('上映电影数量',fontsize=15) plt.savefig('每年上映电影数量统计.png') plt.show() # 每年总票房统计 movies_year_gross=original_format.groupby(['title_year'])['gross'].sum() movies_year_gross.plot(figsize=(10,5),marker='.') plt.title('每年总票房统计',fontsize=22) plt.xlabel('年份',fontsize=15) plt.ylabel('总票房',fontsize=15) plt.savefig('每年票房统计.jpg') plt.show()
时间: 2023-06-16 12:04:03 浏览: 88
这段代码用于对电影数据集进行可视化分析,分别统计每年上映电影的数量和每年总票房。首先使用 `groupby` 方法按照年份进行分组,然后使用 `count` 和 `sum` 方法统计每组中电影数量和总票房。接着使用 `plot` 方法将结果进行绘图,并设置图像的标题、横纵坐标标签和保存图片。最后使用 `show` 方法显示图像。
相关问题
import requests from bs4 import BeautifulSoup import openpyxl def get_movie_data(year): url = f'https://maoyan.com/films?year={year}' headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') movies = soup.select('.movie-item-title') movie_data = [] for movie in movies: movie_link = 'https://maoyan.com' + movie.a['href'] movie_data.append(get_movie_details(movie_link)) return movie_data else: print(f"Failed to fetch data for year {year}") return [] def get_movie_details(url): headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') movie_name = soup.select_one('h1.name').text.strip() release_date = soup.select_one('.info-release').text.strip() genre = soup.select_one('.info-category').text.strip() director = soup.select_one('.info-director').text.strip() actors = [actor.text.strip() for actor in soup.select('.info-actor a')] maoyan_score = soup.select_one('.score-num').text.strip() box_office = soup.select_one('.info-num').text.strip() return { '电影名称': movie_name, '上映日期': release_date, '影片类型': genre, '导演': director, '演员': ', '.join(actors), '猫眼口碑': maoyan_score, '累计票房': box_office } else: print(f"Failed to fetch details for {url}") return {} def save_to_excel(data, filename): wb = openpyxl.Workbook() ws = wb.active headers = ['电影名称', '上映日期', '影片类型', '导演', '演员', '猫眼口碑', '累计票房'] ws.append(headers) for movie in data: row_data = [movie.get(header, '') for header in headers] ws.append(row_data) wb.save(filename) print(f"Data saved to {filename}") if __name__ == '__main__': years = range(2017, 2021) all_movie_data = [] for year in years: movie_data = get_movie_data(year) all_movie_data.extend(movie_data) save_to_excel(all_movie_data, 'maoyan_movies_2017_to_2020.xlsx')
这段代码是一个Python脚本,用于从猫眼电影网站上爬取指定年份的电影数据,并保存到Excel文件中。
代码使用了requests库来发送HTTP请求,使用BeautifulSoup库来解析HTML内容,使用openpyxl库来操作Excel文件。
主要功能包括:
- `get_movie_data(year)`函数:根据指定年份获取电影数据的函数。
- `get_movie_details(url)`函数:获取电影详情的函数。
- `save_to_excel(data, filename)`函数:将电影数据保存到Excel文件的函数。
在主程序中,通过循环遍历2017年到2020年的年份,调用`get_movie_data(year)`函数获取电影数据,然后将所有电影数据保存到名为`maoyan_movies_2017_to_2020.xlsx`的Excel文件中。
注意:爬取网站数据时,请遵守网站的相关规定和法律法规,不要过度请求或滥用。
import requests from bs4 import BeautifulSoup import csv def get_top250_movies(): url = 'https://movie.douban.com/top250' headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} movie_info_list = [] for i in range(0, 250, 25): params = {'start': str(i)} res = requests.get(url, headers=headers, params=params) soup = BeautifulSoup(res.text, 'html.parser') movie_list = soup.find_all('div', class_='info') for movie in movie_list: title = movie.find('span', class_='title').text info = movie.find('div', class_='bd').p.text.strip().split('\n') director = info[0][4:] actors = info[1][3:] year = info[1][-5:-1] rating = movie.find('span', class_='rating_num').text comment_num = movie.find('div', class_='star').find_all('span')[3].text[:-3] movie_info_list.append([title, director, actors, year, rating, comment_num]) return movie_info_list def save_to_csv(movie_info_list): with open('movie_info.csv', 'w', newline='', encoding='utf-8-sig') as f: writer = csv.writer(f) writer.writerow(['电影名称', '导演', '演员', '上映年份', '评分', '评论数']) for movie_info in movie_info_list: writer.writerow(movie_info) if __name__ == '__main__': movie_info_list = get_top250_movies() save_to_csv(movie_info_list) print('电影信息保存成功!')将此代码运行成功后保存的六个信息分别单独成一列
可以在save_to_csv函数中将每个信息分别写入csv文件中的一列,修改后代码如下:
```python
import requests
from bs4 import BeautifulSoup
import csv
def get_top250_movies():
url = 'https://movie.douban.com/top250'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
movie_info_list = []
for i in range(0, 250, 25):
params = {'start': str(i)}
res = requests.get(url, headers=headers, params=params)
soup = BeautifulSoup(res.text, 'html.parser')
movie_list = soup.find_all('div', class_='info')
for movie in movie_list:
title = movie.find('span', class_='title').text
info = movie.find('div', class_='bd').p.text.strip().split('\n')
director = info[0][4:]
actors = info[1][3:]
year = info[1][-5:-1]
rating = movie.find('span', class_='rating_num').text
comment_num = movie.find('div', class_='star').find_all('span')[3].text[:-3]
movie_info_list.append([title, director, actors, year, rating, comment_num])
return movie_info_list
def save_to_csv(movie_info_list):
with open('movie_info.csv', 'w', newline='', encoding='utf-8-sig') as f:
writer = csv.writer(f)
writer.writerow(['电影名称', '导演', '演员', '上映年份', '评分', '评论数'])
for movie_info in movie_info_list:
writer.writerow(movie_info)
# 将每个信息分别写入csv文件中的一列
with open('movie_info.csv', 'r', newline='', encoding='utf-8-sig') as f_read:
with open('movie_info_new.csv', 'w', newline='', encoding='utf-8-sig') as f_write:
reader = csv.reader(f_read)
writer = csv.writer(f_write)
for row in reader:
for i in range(len(row)):
writer.writerow([row[i]])
# 删除原文件
import os
os.remove('movie_info.csv')
# 重命名新文件
os.rename('movie_info_new.csv', 'movie_info.csv')
if __name__ == '__main__':
movie_info_list = get_top250_movies()
save_to_csv(movie_info_list)
print('电影信息保存成功!')
```
运行修改后的代码,会在同级目录下生成一个名为'movie_info.csv'的文件,其中每个信息分别单独成一列。
阅读全文