python爬虫新浪微博并将其保存到excel文件中
时间: 2024-03-14 12:50:13 浏览: 342
【python爬虫】连续爬取一个或多个新浪微博用户的数据并将结果信息写入文件或数据库 介绍
好的,我可以为您提供相关的代码。请先确认您已经安装好了以下库:requests、beautifulsoup4、openpyxl。
接下来是代码实现:
```python
import requests
from bs4 import BeautifulSoup
from openpyxl import Workbook
# 登录微博并获取Cookie
def get_cookie(username, password):
session = requests.session()
login_url = 'https://passport.weibo.cn/signin/login'
post_url = 'https://passport.weibo.cn/sso/login'
session.headers.update({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'})
login_page = session.get(login_url)
soup = BeautifulSoup(login_page.text, 'html.parser')
vk = soup.find('input', {'name': 'vk'})['value']
data = {
'username': username,
'password': password,
'savestate': '1',
'r': '',
'ec': '0',
'pagerefer': '',
'entry': 'mweibo',
'wentry': '',
'loginfrom': '',
'client_id': '',
'code': '',
'qq': '',
'mainpageflag': '1',
'hff': '',
'hfp': '',
'vt': '',
'm': '',
'mainpagewei': '',
'authurl': '',
'u': '',
'vt3': '',
'prelt': '',
'returntype': '',
'domain': '',
'alt': '',
'psp': '',
'sr': '',
's': '',
'vm': '',
'nonce': '',
'su': '',
'service': '',
'servertime': '',
'noncestr': '',
'rsakv': '',
'sp': '',
'sr': '1920*1080',
'encoding': 'UTF-8',
'prelt': '194',
'url': 'https://passport.weibo.cn/signin/welcome?entry=mweibo&r=https%3A%2F%2Fm.weibo.cn%2F'
}
post_data = {
'username': username,
'password': password,
'savestate': '1',
'ec': '0',
'pagerefer': '',
'entry': 'mweibo',
'wentry': '',
'loginfrom': '',
'client_id': '',
'code': '',
'qq': '',
'mainpageflag': '1',
'hff': '',
'hfp': '',
'vt': '',
'm': '',
'mainpagewei': '',
'authurl': '',
'u': '',
'vt3': '',
'sid': '',
'display': '',
'withOfficalFlag': '0',
'response_type': 'code',
'appkey': '',
'state': '',
'redirect_uri': '',
'uid': '',
'scope': '',
'isLoginSina': '',
'from': '',
'client_id': '',
'code': '',
'refresh_token': '',
'checkToken': '',
'verifyToken': '',
'from': '',
'switchLogin': '0',
'action': 'submit',
'withOfficalFlag': '0',
'withOfficalAccount': '',
'lang': '',
'oauth2Callback': '',
'rand': '0',
'csrf_token': '',
'vk': vk,
'password_': ''
}
session.post(post_url, data=post_data)
return session.cookies.get_dict()
# 获取微博数据
def get_weibo_data(cookie, keyword):
session = requests.session()
session.cookies.update(cookie)
weibo_url = 'https://m.weibo.cn/api/container/getIndex?type=all&queryVal=' + keyword + '&featurecode=20000320&luicode=10000011&lfid=106003type%3D1&title=' + keyword
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
'Accept': 'application/json, text/plain, */*',
'Referer': weibo_url
}
response = session.get(weibo_url, headers=headers)
data = response.json()
cards = data['data']['cards']
weibo_data = []
for card in cards:
if card['card_type'] == 9:
mblog = card['mblog']
weibo_id = mblog['id']
text = mblog['text']
reposts_count = mblog['reposts_count']
comments_count = mblog['comments_count']
attitudes_count = mblog['attitudes_count']
created_at = mblog['created_at']
user = mblog['user']
screen_name = user['screen_name']
verified = user['verified']
followers_count = user['followers_count']
follow_count = user['follow_count']
weibo_data.append([weibo_id, text, reposts_count, comments_count, attitudes_count, created_at, screen_name, verified, followers_count, follow_count])
return weibo_data
# 将数据保存到Excel文件
def save_to_excel(weibo_data, keyword):
wb = Workbook()
sheet = wb.active
sheet.title = keyword
sheet.append(['微博ID', '微博内容', '转发数', '评论数', '点赞数', '发布时间', '用户昵称', '是否认证', '粉丝数', '关注数'])
for data in weibo_data:
sheet.append(data)
wb.save(keyword + '.xlsx')
if __name__ == '__main__':
username = 'your_username'
password = 'your_password'
keyword = 'your_keyword'
cookie = get_cookie(username, password)
weibo_data = get_weibo_data(cookie, keyword)
save_to_excel(weibo_data, keyword)
```
这个程序需要您输入您的微博账号和密码,以及您想要爬取的关键词。最后程序会在当前目录下生成一个以关键词命名的Excel文件,保存了爬取到的微博数据。
请注意:这个程序仅供学习和研究使用,请勿用于其他非法用途。
阅读全文