python爬取京东网页并将数据传入数据库,用pyecharts展示数据
时间: 2023-09-17 14:10:04 浏览: 132
好的,以下是我提供的一些爬取京东网页并将数据传入数据库,用pyecharts展示数据的步骤:
1. 导入所需的库
```python
import requests
from pyquery import PyQuery as pq
import pymysql
from pyecharts import options as opts
from pyecharts.charts import Bar
```
2. 连接数据库并创建表格
```python
# 连接数据库
db = pymysql.connect(host='localhost', user='root', password='password', database='test', port=3306)
cursor = db.cursor()
# 创建表格
sql = """
CREATE TABLE IF NOT EXISTS `jd_goods` (
`id` int(10) NOT NULL AUTO_INCREMENT,
`title` varchar(255) NOT NULL,
`price` float NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
"""
cursor.execute(sql)
db.commit()
```
3. 定义爬取函数并解析数据
```python
def get_jd_info(keyword):
url = 'https://search.jd.com/Search?keyword={}&enc=utf-8'.format(keyword)
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
r = requests.get(url, headers=headers)
html = r.text
doc = pq(html)
items = doc('.gl-item').items()
data = []
for item in items:
title = item.find('.p-name em').text().replace('\n','')
price = item.find('.p-price i').text()
data.append({'title':title, 'price':price})
return data
```
4. 将数据存入数据库
```python
keyword = '手机'
data = get_jd_info(keyword)
sql = "INSERT INTO jd_goods(title, price) VALUES(%s, %s)"
for item in data:
try:
cursor.execute(sql, (item['title'], float(item['price'])))
db.commit()
except:
db.rollback()
```
5. 从数据库中获取数据并用pyecharts可视化
```python
# 从数据库中获取数据
sql = "SELECT * FROM jd_goods"
cursor.execute(sql)
results = cursor.fetchall()
prices = [item[2] for item in results]
titles = [item[1] for item in results]
# 使用pyecharts绘制柱状图
bar = (
Bar()
.add_xaxis(titles)
.add_yaxis("价格", prices)
.set_global_opts(title_opts=opts.TitleOpts(title="{}价格分布".format(keyword)))
)
bar.render("{}.html".format(keyword))
```
以上是一个简单的例子,你可以根据自己的需求对代码进行修改和优化。
阅读全文