import scrapy from yg.items import YgItem #引入定义好的item class YgspiderSpider(scrapy.Spider): name="ygspider" alllowed_domains=["sun0769.com"] start_urls = ["https://wz.sun0769.com/political/index/politicsNewest?id=1&page=1"] def parse(self, response): lilist =response.xpath('//ul[@class="title-state-ul"]/li') for li in lilist: item =YgItem() item['id']= li.xpath('./span[1]/text()').extract_first() item['status']= li.xpath('./span[2]/text()').extract_first() item['title']= li.xpath('./span[3]/a/text()').extract_first() infohref='https://wz.sun0769.com'+li.xpath('./span[3]/a/@href').extract_first() item['retime']= li.xpath('./span[4]/text()').extract_first() item['asktime']= li.xpath('./span[5]/text()').extract_first() #解析详情页 yield scrapy.Request(url=infohref,callback=self.parseinfo,meta={'item':item}) # print(item) #翻页 next_page='https://wz.sun0769.com'+response.xpath('//[@class="arrow-page prov_rota"]/@href').extract_first() if next_page: yield scrapy.Request(url=next_page,callback=self.parse) print(next_page) pass def parseinfo(self, response): item =response.meta['item'] pass上述代码报错,希望改正一下
时间: 2024-03-08 09:45:06 浏览: 158
第八章:scrapy框架_第八章:scrapy框架.zip_
根据你提供的代码和报错信息,我猜测可能是你的项目结构和文件命名有问题。建议你按照 Scrapy 官方文档的要求进行项目创建和文件命名,比如:
1. 项目名应该是一个有效的 Python 标识符,建议使用小写字母和下划线,比如 `myproject`。
2. Spider 类的文件名应该与 Spider 类的 `name` 属性相同,比如 `ygspider.py`。
3. Items 类的文件名应该是 `items.py`。
具体的代码实现如下:
`myproject/spiders/ygspider.py`
```python
import scrapy
from myproject.items import YgItem
class YgSpider(scrapy.Spider):
name = "ygspider"
allowed_domains = ["sun0769.com"]
start_urls = ["https://wz.sun0769.com/political/index/politicsNewest?id=1&page=1"]
def parse(self, response):
lilist = response.xpath('//ul[@class="title-state-ul"]/li')
for li in lilist:
item = YgItem()
item['id'] = li.xpath('./span[1]/text()').extract_first()
item['status'] = li.xpath('./span[2]/text()').extract_first()
item['title'] = li.xpath('./span[3]/a/text()').extract_first()
infohref = 'https://wz.sun0769.com' + li.xpath('./span[3]/a/@href').extract_first()
item['retime'] = li.xpath('./span[4]/text()').extract_first()
item['asktime'] = li.xpath('./span[5]/text()').extract_first()
# 解析详情页
yield scrapy.Request(url=infohref, callback=self.parseinfo, meta={'item': item})
# 翻页
next_page = response.xpath('//[@class="arrow-page prov_rota"]/@href').extract_first()
if next_page:
next_page = 'https://wz.sun0769.com' + next_page
yield scrapy.Request(url=next_page, callback=self.parse)
def parseinfo(self, response):
item = response.meta['item']
yield item
```
`myproject/items.py`
```python
import scrapy
class YgItem(scrapy.Item):
id = scrapy.Field()
status = scrapy.Field()
title = scrapy.Field()
retime = scrapy.Field()
asktime = scrapy.Field()
content = scrapy.Field()
```
建议你按照上述方式进行代码重构,然后再运行一下看看是否能够正常抓取数据。
阅读全文