python爬虫数据可视化BOSS直聘
时间: 2023-12-02 14:03:54 浏览: 108
人工智能-项目实践-可视化-Boss直聘岗位数据爬虫分析可视化.zip
5星 · 资源好评率100%
以下是使用Python爬取BOSS直聘岗位数据并进行可视化的步骤:
1. 导入必要的库
```python
import requests
from bs4 import BeautifulSoup
import pymysql
from pyecharts.charts import Bar, Pie, WordCloud
from pyecharts import options as opts
from flask import Flask, render_template
```
2. 爬取数据
```python
# 爬取BOSS直聘网站上的数据
def get_data(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
return soup
```
3. 爬取多页数据
```python
# 爬取多页数据
def get_all_data():
all_data = []
for i in range(1, 11):
url = 'https://www.zhipin.com/c101280100-p100109/?page={}&ka=page-{}'.format(i, i)
soup = get_data(url)
data_list = soup.find_all('div', class_='job-primary')
for data in data_list:
job_name = data.find('div', class_='job-title').text.strip()
salary = data.find('span', class_='red').text.strip()
company = data.find('div', class_='company-text').find('a').text.strip()
education = data.find('div', class_='job-limit clearfix').find_all('span')[1].text.strip()
welfare = data.find('div', class_='info-append').find_all('span')
welfare_list = [w.text.strip() for w in welfare]
all_data.append([job_name, salary, company, education, welfare_list])
return all_data
```
4. 存储数据
```python
# 存储数据到MySQL数据库
def save_data(data):
db = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='boss_zhipin')
cursor = db.cursor()
sql = 'INSERT INTO job_info(job_name, salary, company, education, welfare) values(%s, %s, %s, %s, %s)'
try:
cursor.executemany(sql, data)
db.commit()
except Exception as e:
print(e)
db.rollback()
db.close()
```
5. 数据可视化
```python
# 数据可视化
app = Flask(__name__)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/salary')
def salary():
db = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='boss_zhipin')
cursor = db.cursor()
sql = 'SELECT salary FROM job_info'
cursor.execute(sql)
results = cursor.fetchall()
salary_list = []
for result in results:
salary = result[0].replace('k', '').replace('K', '')
salary_list.append(int(salary))
salary_dict = {}
for i in range(0, 31, 5):
salary_dict['{}k-{}k'.format(i, i + 5)] = 0
for salary in salary_list:
for key in salary_dict.keys():
if salary >= int(key.split('-')[0]) and salary <= int(key.split('-')[1]):
salary_dict[key] += 1
bar = Bar()
bar.add_xaxis(list(salary_dict.keys()))
bar.add_yaxis('薪资分布', list(salary_dict.values()))
bar.set_global_opts(title_opts=opts.TitleOpts(title='BOSS直聘薪资分布图'))
return bar.dump_options_with_quotes()
@app.route('/education')
def education():
db = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='boss_zhipin')
cursor = db.cursor()
sql = 'SELECT education FROM job_info'
cursor.execute(sql)
results = cursor.fetchall()
education_list = []
for result in results:
education_list.append(result[0])
education_dict = {}
for education in education_list:
if education in education_dict.keys():
education_dict[education] += 1
else:
education_dict[education] = 1
pie = Pie()
pie.add('', list(education_dict.items()))
pie.set_global_opts(title_opts=opts.TitleOpts(title='BOSS直聘学历要求分布图'))
return pie.dump_options_with_quotes()
@app.route('/welfare')
def welfare():
db = pymysql.connect(host='localhost', user='root', password='123456', port=3306, db='boss_zhipin')
cursor = db.cursor()
sql = 'SELECT welfare FROM job_info'
cursor.execute(sql)
results = cursor.fetchall()
welfare_list = []
for result in results:
welfare_list.extend(result[0])
welfare_dict = {}
for welfare in welfare_list:
if welfare in welfare_dict.keys():
welfare_dict[welfare] += 1
else:
welfare_dict[welfare] = 1
wordcloud = WordCloud()
wordcloud.add('', list(welfare_dict.items()), word_size_range=[20, 100])
wordcloud.set_global_opts(title_opts=opts.TitleOpts(title='BOSS直聘福利词云图'))
return wordcloud.dump_options_with_quotes()
if __name__ == '__main__':
app.run()
```
阅读全文