python爬取58同城网站招聘信息1000条id包含:编号、公司名称、学历要求、工作类型、工作名称、薪资、发布时间、截止时间、城市编码、公司规模、福利、岗位职责、地区、工作经验等信息,并以表格形式写入文本
时间: 2023-10-02 10:04:48 浏览: 113
好的,针对您的需求,我可以提供以下Python代码实现:
```python
import requests
from bs4 import BeautifulSoup
import csv
url = "https://cn.58.com/job/?key=%E5%89%8D%E7%AB%AF&cmcskey=%E5%89%8D%E7%AB%AF&final=1&jump=1"
# 发送请求获取数据
def get_data(url):
r = requests.get(url)
r.encoding = "utf-8"
return r.text
# 解析数据
def parse_data(html):
soup = BeautifulSoup(html, "html.parser")
job_list = soup.find_all("div", class_="job-item")
data = []
for job in job_list:
job_id = job["data-jk"]
company = job.find("div", class_="job-title").find("span", class_="job-name").get_text().strip()
education = job.find("div", class_="job-title").find("span", class_="job-desc").get_text().strip()
job_type = job.find("div", class_="job-title").find("span", class_="job-tag").get_text().strip()
job_name = job.find("div", class_="job-title").find("a").get_text().strip()
salary = job.find("div", class_="job-salary").get_text().strip()
publish_time = job.find("div", class_="job-pub-time").get_text().strip()
end_time = job.find("div", class_="job-end-time").get_text().strip()
city_code = job.find("div", class_="job-area").find("span").get_text().strip()
company_scale = job.find("div", class_="job-desc").find("span").get_text().strip()
welfare = job.find("div", class_="job-welfare").get_text().strip()
job_responsibility = job.find("div", class_="job-pub-desc").get_text().strip()
area = job.find("div", class_="job-area").find_all("span")[1].get_text().strip()
experience = job.find("div", class_="job-desc").find_all("span")[1].get_text().strip()
# 将获取到的数据存入列表中
data.append([job_id, company, education, job_type, job_name, salary, publish_time, end_time, city_code, company_scale, welfare, job_responsibility, area, experience])
return data
# 将数据写入CSV文件
def write_to_csv(data):
with open("job_list.csv", "w", encoding="utf-8", newline="") as f:
writer = csv.writer(f)
writer.writerow(["编号", "公司名称", "学历要求", "工作类型", "工作名称", "薪资", "发布时间", "截止时间", "城市编码", "公司规模", "福利", "岗位职责", "地区", "工作经验"])
for row_data in data:
writer.writerow(row_data)
if __name__ == "__main__":
all_data = []
for page in range(1, 11):
url = f"https://cn.58.com/job/pn{page}/?key=%E5%89%8D%E7%AB%AF&cmcskey=%E5%89%8D%E7%AB%AF&final=1&jump=1"
html = get_data(url)
data = parse_data(html)
all_data.extend(data)
write_to_csv(all_data)
```
代码中使用requests库向网站发送请求,使用BeautifulSoup库解析网站数据,将数据存储在列表中,并使用csv库将数据写入到CSV文件中。同时,代码还可以根据需要爬取多页数据。您可以根据自己的需求修改代码中的URL和参数。
阅读全文