with open('maoyan100-02.csv',mode='a',newline="",encoding='utf-8') as f: csv_writer = csv.writer(f) csv_writer.writerow([title,star,releasetime,score])
时间: 2023-12-08 16:06:16 浏览: 107
import requests import pprint url = 'http://piaofang.maoyan.com/dashboard-ajax/movie' params = {} cookies = {} headers = {} response = requests.get(url=url, params=params, headers=headers, cookies=cookies) html_data = response.json() pprint.pprint(html_data) movieList = html_data['movieList']['list'] dit = {} for i in movieList: dit['电影名'] = i['movieInfo']['movieName'] dit['票房'] = i['sumBoxDesc'] dit['票房占比'] = i['boxRate'] dit['排片占比'] = i['showCountRate'] dit['上映周期'] = i['movieInfo']['releaseInfo'] pprint.pprint(dit) import csv f = open('data.csv', mode='a', encoding='utf-8', newline='') csv_write = csv.DictWriter(f, fieldnames=['电影名', '票房', '票房占比', '排片占比', '上映周期']) csv_write.writeheader() f.close()
import requests from bs4 import BeautifulSoup import openpyxl def get_movie_data(year): url = f'https://maoyan.com/films?year={year}' headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') movies = soup.select('.movie-item-title') movie_data = [] for movie in movies: movie_link = 'https://maoyan.com' + movie.a['href'] movie_data.append(get_movie_details(movie_link)) return movie_data else: print(f"Failed to fetch data for year {year}") return [] def get_movie_details(url): headers = {'User-Agent': 'Mozilla/5.0'} response = requests.get(url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.content, 'html.parser') movie_name = soup.select_one('h1.name').text.strip() release_date = soup.select_one('.info-release').text.strip() genre = soup.select_one('.info-category').text.strip() director = soup.select_one('.info-director').text.strip() actors = [actor.text.strip() for actor in soup.select('.info-actor a')] maoyan_score = soup.select_one('.score-num').text.strip() box_office = soup.select_one('.info-num').text.strip() return { '电影名称': movie_name, '上映日期': release_date, '影片类型': genre, '导演': director, '演员': ', '.join(actors), '猫眼口碑': maoyan_score, '累计票房': box_office } else: print(f"Failed to fetch details for {url}") return {} def save_to_excel(data, filename): wb = openpyxl.Workbook() ws = wb.active headers = ['电影名称', '上映日期', '影片类型', '导演', '演员', '猫眼口碑', '累计票房'] ws.append(headers) for movie in data: row_data = [movie.get(header, '') for header in headers] ws.append(row_data) wb.save(filename) print(f"Data saved to {filename}") if __name__ == '__main__': years = range(2017, 2021) all_movie_data = [] for year in years: movie_data = get_movie_data(year) all_movie_data.extend(movie_data) save_to_excel(all_movie_data, 'maoyan_movies_2017_to_2020.xlsx')
- `get_movie_data(year)`函数:根据指定年份获取电影数据的函数。
- `get_movie_details(url)`函数:获取电影详情的函数。
- `save_to_excel(data, filename)`函数:将电影数据保存到Excel文件的函数。