分成两段逻辑,一段获取到数据,一段放入数据库
时间: 2024-04-03 07:33:42 浏览: 100
数学建模学习资料 姜启源数学模型课件 M04 数学规划模型 共85页.pptx
好的,以下是修改后的代码。将获取数据的逻辑和将数据放入数据库的逻辑分开了,更加清晰易懂。
```
import requests
from bs4 import BeautifulSoup
from pprint import pprint
import pymysql
# 定义函数:将评论数据保存到数据库中
def save_to_navicat(comments):
# 创建数据库连接
conn = pymysql.connect(host='localhost',
user='root',
password='root',
db='pa',
charset='utf8')
# 创建游标对象
cursor = conn.cursor()
# 插入数据到 MySQL 数据库中
for comment in comments:
sql = '''
INSERT INTO comment (content, author, likes, time, location)
VALUES (%s, %s, %s, %s, %s)
'''
try:
# 执行 SQL 插入语句
cursor.execute(sql, (comment['content'], comment['author'], comment['likes'], comment['time'], comment['location']))
# 提交事务
conn.commit()
except Exception as e:
# 如果发生异常,回滚事务
conn.rollback()
print('Insert error:', e)
# 关闭游标和连接
cursor.close()
conn.close()
# 定义函数:获取评论数据
def get_comments(page):
url = "https://movie.douban.com/subject/30391186/comments?start={}&limit=20&sort=new_score&status=P".format(page*20)
headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3" }
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
comments = []
for comment in soup.select(".comment-item"):
content = comment.select_one(".comment-content").get_text().strip()
author = comment.select_one(".comment-info a").get_text().strip()
likes = comment.select_one(".votes").get_text().strip()
time = comment.select_one(".comment-time").get_text().strip()
location = comment.select_one(".comment-location").contents[-1].strip()
comments.append({
"content": content,
"author": author,
"likes": likes,
"time": time,
"location": location
})
return comments
# 获取前5页的评论数据并保存到数据库
for page in range(5):
comments = get_comments(page)
save_to_navicat(comments)
print("第{}页的数据:".format(page+1))
pprint(comments)
```
这样修改后,代码中的逻辑更加清晰,易于理解。
阅读全文