爬虫数据可视化大作业
时间: 2023-12-17 09:28:09 浏览: 272
为了进行爬虫数据可视化大作业,我们需要先进行数据的爬取和保存。可以使用Python的requests和BeautifulSoup库进行网页爬取,使用xlwt库将数据保存到Excel文件中。具体代码如下:
```python
import requests
from bs4 import BeautifulSoup
import xlwt
# 爬取数据
def getData():
datalist = []
for i in range(0, 10):
url = 'https://www.xxx.com/page/' + str(i)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
html = requests.get(url, headers=headers).text
soup = BeautifulSoup(html, 'html.parser')
for item in soup.find_all('div', class_='job-item'):
data = []
item_title = item.find('div', class_='job-title')
data.append(item_title.text.strip())
item_company = item.find('div', class_='company-name')
data.append(item_company.text.strip())
item_salary = item.find('div', class_='job-salary')
data.append(item_salary.text.strip())
item_location = item.find('div', class_='job-location')
data.append(item_location.text.strip())
item_exp = item.find('div', class_='job-exp')
data.append(item_exp.text.strip())
item_edu = item.find('div', class_='job-edu')
data.append(item_edu.text.strip())
item_welfare = item.find('div', class_='job-welfare')
data.append(item_welfare.text.strip())
datalist.append(data)
return datalist
# 保存数据到Excel文件中
def saveData(datalist, savepath):
book = xlwt.Workbook(encoding="utf-8", style_compression=0)
sheet = book.add_sheet('python', cell_overwrite_ok=True)
col = ("工作链接", "工作名称", "公司", "薪资", "工作地区", "工作经验", "学历", "员工福利")
for i in range(0, 8):
sheet.write(0, i, col[i])
for i in range(0, len(datalist)):
data = datalist[i]
for j in range(0, 8):
sheet.write(i + 1, j, data[j])
book.save(savepath)
# 数据可视化
# 这里可以使用matplotlib、seaborn等库进行数据可视化,具体方法可以根据需求进行选择和实现。
if __name__ == '__main__':
datalist = getData()
saveData(datalist, 'data.xls')
```
在数据爬取和保存完成后,我们可以使用matplotlib、seaborn等库进行数据可视化。具体方法可以根据需求进行选择和实现。例如,我们可以使用matplotlib库绘制柱状图和饼图,代码如下:
```python
import xlrd
import matplotlib.pyplot as plt
# 读取Excel文件中的数据
def readData(filename):
data = xlrd.open_workbook(filename)
table = data.sheet_by_name('python')
nrows = table.nrows
ncols = table.ncols
datalist = []
for i in range(1, nrows):
rowdata = []
for j in range(ncols):
rowdata.append(table.cell(i, j).value)
datalist.append(rowdata)
return datalist
# 绘制柱状图
def drawBar(datalist):
salary = {}
for data in datalist:
if data[3] in salary:
salary[data[3]] += 1
else:
salary[data[3]] = 1
plt.bar(range(len(salary)), list(salary.values()), align='center')
plt.xticks(range(len(salary)), list(salary.keys()))
plt.xlabel('Salary')
plt.ylabel('Number of Jobs')
plt.title('Bar Chart of Salary')
plt.show()
# 绘制饼图
def drawPie(datalist):
edu = {}
for data in datalist:
if data[6] in edu:
edu[data[6]] += 1
else:
edu[data[6]] = 1
labels = list(edu.keys())
sizes = list(edu.values())
plt.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
plt.axis('equal')
plt.title('Pie Chart of Education')
plt.show()
if __name__ == '__main__':
datalist = readData('data.xls')
drawBar(datalist)
drawPie(datalist)
```
阅读全文
相关推荐















