python 爬取教育行业信息
时间: 2023-12-23 14:27:51 浏览: 24
以下是使用Python爬取教育行业信息的示例代码:
```python
import requests
from bs4 import BeautifulSoup
# 定义爬取BOSS直聘的函数
def crawl_boss():
url = "https://www.zhipin.com/xxx" # 替换为BOSS直聘的教育行业页面链接
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
# 提取岗位信息
job_list = soup.find_all("div", class_="job-list-item")
for job in job_list:
job_name = job.find("span", class_="job-name").text
salary = job.find("span", class_="red").text
location = job.find("span", class_="job-area").text
experience = job.find("span", class_="job-experience").text
education = job.find("span", class_="job-pub-time").text
company_name = job.find("div", class_="company-text").find("a").text
company_type = job.find("div", class_="company-text").find_all("p")[0].text
company_status = job.find("div", class_="company-text").find_all("p")[1].text
company_size = job.find("div", class_="company-text").find_all("p")[2].text
# 打印岗位信息
print("岗位名称:", job_name)
print("薪资:", salary)
print("地点:", location)
print("工作年限:", experience)
print("学历要求:", education)
print("公司名称:", company_name)
print("公司类型:", company_type)
print("公司状态:", company_status)
print("公司规模:", company_size)
print("--")
# 定义爬取拉勾网的函数
def crawl_lagou():
url = "https://www.lagou.com/xxx" # 替换为拉勾网的教育行业页面链接
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
# 提取岗位信息
job_list = soup.find_all("li", class_="con_list_item")
for job in job_list:
job_name = job.find("div", class_="position").find("h3").text
salary = job.find("span", class_="money").text
location = job.find("span", class_="add").text
experience = job.find("div", class_="p_bot").find_all("span")[1].text
education = job.find("div", class_="p_bot").find_all("span")[2].text
company_name = job.find("div", class_="company").find("a").text
company_type = job.find("div", class_="company").find_all("span")[1].text
company_status = job.find("div", class_="company").find_all("span")[2].text
company_size = job.find("div", class_="company").find_all("span")[3].text
# 打印岗位信息
print("岗位名称:", job_name)
print("薪资:", salary)
print("地点:", location)
print("工作年限:", experience)
print("学历要求:", education)
print("公司名称:", company_name)
print("公司类型:", company_type)
print("公司状态:", company_status)
print("公司规模:", company_size)
print("--")
# 调用爬取BOSS直聘的函数
crawl_boss()
# 调用爬取拉勾网的函数
crawl_lagou()
```
请注意,上述代码中的链接需要替换为实际的教育行业页面链接。另外,需要安装requests和BeautifulSoup库。