生成Python爬取需要登录的系统页面数据写入数据库的表中的代码
时间: 2023-03-13 10:16:58 浏览: 93
下面是一个示例代码:import requests
from bs4 import BeautifulSoup
import pymysql# 登录
url = 'http://example.com/login'
data = {'username': 'user', 'password': '123456'}
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'}
s = requests.Session()
r = s.post(url, data=data, headers=headers)
# 获取数据
url = 'http://example.com/'
r = s.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'lxml')# 解析数据
data_list = []
for tr in soup.find_all('tr'):
data = {}
tds = tr.find_all('td')
if tds:
data['title'] = tds[0].text
data['url'] = tds[1].text
data_list.append(data)# 保存数据
# 创建数据库连接
conn = pymysql.connect(host='127.0.0.1', user='root', password='123456', database='spider', charset='utf8')
# 创建游标
cursor = conn.cursor()
# 执行SQL语句,插入数据
for data in data_list:
sql = 'insert into table_name(title, url) values(%s, %s)'
cursor.execute(sql, (data['title'], data['url']))
# 提交事务
conn.commit()
# 关闭连接
cursor.close()
conn.close()
阅读全文