帮我将以下代码写注释# coding=gbk # -- coding:uft-8 -- # BOSS直聘 import requests from time import sleep import csv def collect(page): url = f'https://www.zhipin.com/wapi/zpgeek/search/joblist.json' headers = { 'accept': 'application/json, text/plain, /', 'cookie': ck, 'referer': 'https://www.zhipin.com/web/geek/job?query=%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%BD%AF%E4%BB%B6&city=101190100', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36', 'x-requested-with': 'XMLHttpRequest' } params = { 'scene': '1', 'query': '计算机软件', 'city': '100010000', 'experience': '', 'degree': '', 'industry': '', 'scale': '', 'stage': '', 'position': '', 'salary': '', 'multiBusinessDistrict': '', 'page': page, 'pageSize': '30' } res = requests.get(url=url, headers=headers, params=params).json() for li in res['zpData']['jobList']: jobName = li['jobName'] brandName = li['brandName'] cityName = li['cityName'] areaDistrict = li['areaDistrict'] businessDistrict = li['businessDistrict'] salaryDesc = li['salaryDesc'] jobExperience = li['jobExperience'] jobDegree = li['jobDegree'] skills = '/'.join(li['skills']) brandIndustry = li['brandIndustry'] welfareList = '/'.join(li['welfareList']) row = [jobName, brandName, cityName, areaDistrict, businessDistrict, salaryDesc, jobExperience, jobDegree, skills, brandIndustry, welfareList] print(row) csv.writer(open('BOSS直聘.csv', 'a', encoding='utf-8-sig', newline='')).writerow(row) if name == 'main': ck = '' csv.writer(open('BOSS直聘.csv', 'w', encoding='utf-8-sig', newline='')).writerow( ['jobName', 'brandName', 'cityName', 'areaDistrict', 'businessDistrict', 'salaryDesc', 'jobExperience', 'jobDegree', 'skills', 'brandIndustry', 'welfareList']) for p in range(10): p += 1 while True: try: collect(p) break except: ck = input('请输入cookie: ') print(f'page {p} finish!') sleep(5)
时间: 2023-08-20 10:45:22 浏览: 197
# coding=gbk
# -- coding:utf-8 --
# BOSS直聘
import requests
from time import sleep
import csv
# 定义函数collect,用于收集每一页的数据
def collect(page):
# 设置请求的URL和headers
url = f'https://www.zhipin.com/wapi/zpgeek/search/joblist.json'
headers = {
'accept': 'application/json, text/plain, /',
'cookie': ck, # 这里的ck需要用户输入
'referer': 'https://www.zhipin.com/web/geek/job?query=%E8%AE%A1%E7%AE%97%E6%9C%BA%E8%BD%AF%E4%BB%B6&city=101190100',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36',
'x-requested-with': 'XMLHttpRequest'
}
# 设置请求的params参数
params = {
'scene': '1',
'query': '计算机软件',
'city': '100010000',
'experience': '',
'degree': '',
'industry': '',
'scale': '',
'stage': '',
'position': '',
'salary': '',
'multiBusinessDistrict': '',
'page': page,
'pageSize': '30'
}
# 发送请求并获取返回的JSON数据
res = requests.get(url=url, headers=headers, params=params).json()
# 遍历每个职位信息,并将其写入CSV文件
for li in res['zpData']['jobList']:
jobName = li['jobName']
brandName = li['brandName']
cityName = li['cityName']
areaDistrict = li['areaDistrict']
businessDistrict = li['businessDistrict']
salaryDesc = li['salaryDesc']
jobExperience = li['jobExperience']
jobDegree = li['jobDegree']
skills = '/'.join(li['skills'])
brandIndustry = li['brandIndustry']
welfareList = '/'.join(li['welfareList'])
row = [jobName, brandName, cityName, areaDistrict, businessDistrict, salaryDesc, jobExperience, jobDegree, skills, brandIndustry, welfareList]
print(row) # 打印每个职位信息
csv.writer(open('BOSS直聘.csv', 'a', encoding='utf-8-sig', newline='')).writerow(row) # 将职位信息写入CSV文件
# 如果该脚本作为主程序运行,则从这里开始执行
if __name__ == 'main':
ck = '' # 定义Cookie初始值为空
# 在CSV文件中写入表头
csv.writer(open('BOSS直聘.csv', 'w', encoding='utf-8-sig', newline='')).writerow(
['jobName', 'brandName', 'cityName', 'areaDistrict', 'businessDistrict', 'salaryDesc', 'jobExperience', 'jobDegree', 'skills', 'brandIndustry', 'welfareList'])
# 循环收集前10页的职位信息
for p in range(10):
p += 1
while True:
try:
collect(p) # 收集每一页的职位信息
break
except:
ck = input('请输入cookie: ') # 如果出现错误,需要用户重新输入Cookie
print(f'page {p} finish!') # 打印完成一页的信息
sleep(5) # 暂停5秒,避免请求过于频繁
阅读全文