self.dbpool=adbapi.ConnectionPool('MySQLdb',**dbargs)
def process_item(self, item, spider):
res = self.dbpool.runInteraction(self.insert_into_table,item)
return item
将爬取到的信息插入 MySQL 数据库
def insert_into_table(self,conn,item):
conn.execute('INSERT INTO
collect2(workcity,job_name,job_inwhichcompany,min_salary,max_salary,job
_category,workplace,zhaopin_numbers,\
job_welfare,education_background,min_workexperience,job_form,job_releas
etime,company_name,company_form,\
company_industry,company_scale,company_web,company_address,data_addtime
,data_sourceweb,job_require,company_introduce,salary)\
VALUES(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%
s,%s,%s)'\
,(item['workcity'],item['job_name'],item['job_inwhichcompany'],
item['min_salary'],item['max_salary'],item['job_category'],item['workpl
ace'],\
item['zhaopin_numbers'],item['job_welfare'],item['education_background'
],item['min_workexperience'],\
item['job_form'],item['job_releasetime'],item['company_name'],item['com
pany_form'],item['company_industry'],item['company_scale'],\
item['company_web'],item['company_address'],item['data_addtime'],item['
data_sourceweb'],item['job_require'],item['company_introduce'],item['sa
lary']))
spider/collect1.py 爬虫主程序
# -*- coding: utf-8 -*-
import scrapy
from scrapy.loader import ItemLoader
from jobsdata_collect.items import Posts
import datetime
import string
import re
class Collect1Spider(scrapy.Spider):