用python写一个爬去QQ聊天记录的程序
时间: 2023-05-31 07:04:13 浏览: 323
由于QQ聊天记录是需要登录QQ账号才能查看的,因此需要使用QQ的API或者模拟登录的方式来实现爬取聊天记录的功能。以下是使用模拟登录的方式实现的Python代码:
```python
import requests
from bs4 import BeautifulSoup
# 登录QQ账号,并获取cookie
def login_qq(username, password):
login_url = 'https://xui.ptlogin2.qq.com/cgi-bin/xlogin'
headers = {
'Referer': 'https://xui.ptlogin2.qq.com/cgi-bin/xlogin?appid=715030901&daid=73&pt_no_auth=1&s_url=https%3A%2F%2Fid.qq.com%2Findex.html',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36'
}
data = {
'u': username,
'p': password,
'verifycode': '',
'webqq_type': '40',
'remember_uin': '1',
'login2qq': '1',
'aid': '715030901',
'u1': 'https://id.qq.com/index.html',
'ptredirect': '0',
'h': '1',
'ptlang': '2052',
'daid': '73',
'from_ui': '1',
'pttype': '1',
'dumy': '',
'fp': 'loginerroralert',
'action': '0-35-1495675005110',
'mibao_css': 'm_webqq',
't': '1',
'g': '1',
'js_type': '0',
'js_ver': '10231',
'login_sig': '',
'pt_rsa': '0',
'pt_qzone_sig': '0',
'pt_vcode_v1': '0',
'pt_verifysession_v1': '',
'pt_randsalt': '2',
'pt_jstoken': '2040674265'
}
session = requests.Session()
response = session.post(login_url, headers=headers, data=data)
return session.cookies.get_dict()
# 获取QQ聊天记录
def get_qq_chat_history(qq_number, cookie):
chat_url = 'https://user.qzone.qq.com/proxy/domain/r.qzone.qq.com/cgi-bin/tfriend/qqchat_list_v2'
headers = {
'Referer': 'https://user.qzone.qq.com/{}/infocenter'.format(qq_number),
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
'Cookie': 'uin=o{}; skey={}'.format(qq_number, cookie['skey'])
}
params = {
'uin': qq_number,
'start': 0,
'num': 10,
'cgi_host': 'http://m.qzone.qq.com/cgi-bin',
'callback': 'handleTList',
't': '0.4908397081636454',
'g_tk': '5381'
}
session = requests.Session()
response = session.get(chat_url, headers=headers, params=params)
soup = BeautifulSoup(response.text, 'html.parser')
chats = soup.find_all('li', class_='list_item')
for chat in chats:
sender = chat.find('a', class_='sender').text.strip()
content = chat.find('div', class_='content').text.strip()
print('{}: {}'.format(sender, content))
# 测试代码
if __name__ == '__main__':
qq_number = '123456789' # QQ号码
username = 'your_qq_username' # 登录QQ账号
password = 'your_qq_password' # 登录QQ密码
cookie = login_qq(username, password) # 登录QQ账号,并获取cookie
get_qq_chat_history(qq_number, cookie) # 获取QQ聊天记录
```
上述代码中,`login_qq`函数用于模拟登录QQ账号,并获取cookie信息;`get_qq_chat_history`函数用于获取QQ聊天记录。通过调用这两个函数,即可实现获取QQ聊天记录的功能。需要注意的是,QQ聊天记录的获取需要登录QQ账号,因此需要提供正确的QQ账号和密码。
阅读全文