from bs4 import BeautifulSoup import requests import time import pymysql conn = pymysql.connect(host='localhost', user='root', password='123456', charset='utf8') cursor = conn.cursor() cursor.execute('create database wyya;') cursor.execute('use wyya;') create_Tb = 'create table sj(地址 varchar(100),标题 varchar(100),播放量 varchar(50),作者 varchar(50));' cursor.execute(create_Tb) headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36' } for i in range(0, 1505, 35): print(i) time.sleep(2) url = 'https://music.163.com/discover/playlist/?cat=华语&order=hot&limit=35&offset=' + str(i)#修改这里即可 response = requests.get(url=url, headers=headers) html = response.text soup = BeautifulSoup(html, 'html.parser') # 获取包含歌单详情页网址的标签 ids = soup.select('.dec a') # 获取包含歌单索引页信息的标签 lis = soup.select('#m-pl-container li') print(len(lis)) for j in range(len(lis)): # 获取歌单详情页地址 url = ids[j]['href'] # 获取歌单标题 title = ids[j]['title'] # 获取歌单播放量 play = lis[j].select('.nb')[0].get_text() # 获取歌单贡献者名字 user = lis[j].select('p')[1].select('a')[0].get_text() # 输出歌单索引页信息 print(url, title, play, user) insert_Tb = 'insert into sj(地址,标题,播放量,作者) values(%s,%s,%s,%s);' val = (url, title, play, user) cursor.execute(insert_Tb, val) cursor.execute("select *from sj;") conn.commit(); data = cursor.fetchall() for bases in data: print(bases) conn.close()写出优化后的这段代码,使爬取到的所有数据全部存入数据库
时间: 2024-04-28 21:25:56 浏览: 140
from bs4 import BeautifulSoup
import requests
import time
import pymysql
# 连接数据库
conn = pymysql.connect(host='localhost', user='root', password='123456', charset='utf8')
cursor = conn.cursor()
# 创建数据库和表
cursor.execute('create database if not exists wyya;')
cursor.execute('use wyya;')
create_Tb = 'create table if not exists sj(地址 varchar(100),标题 varchar(100),播放量 varchar(50),作者 varchar(50));'
cursor.execute(create_Tb)
# 设置请求头
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
# 循环爬取歌单信息并存入数据库
for i in range(0, 1505, 35):
print(i)
time.sleep(2)
url = 'https://music.163.com/discover/playlist/?cat=华语&order=hot&limit=35&offset=' + str(i)
response = requests.get(url=url, headers=headers)
html = response.text
soup = BeautifulSoup(html, 'html.parser')
ids = soup.select('.dec a')
lis = soup.select('#m-pl-container li')
print(len(lis))
for j in range(len(lis)):
url = ids[j]['href']
title = ids[j]['title']
play = lis[j].select('.nb')[0].get_text()
user = lis[j].select('p')[1].select('a')[0].get_text()
print(url, title, play, user)
# 将爬取到的信息插入数据库
insert_Tb = 'insert into sj(地址,标题,播放量,作者) values(%s,%s,%s,%s);'
val = (url, title, play, user)
cursor.execute(insert_Tb, val)
# 查询并输出数据库中的数据
cursor.execute("select *from sj;")
data = cursor.fetchall()
for bases in data:
print(bases)
# 提交并关闭数据库连接
conn.commit()
conn.close()
阅读全文