import scrapy from dangdang.items import DangdangItem from scrapy.http import Request import csv class DdSpider(scrapy.Spider): name = 'dd' allowed_domains = ['dangdang.com'] start_urls = ['http://category.dangdang.com/pg1-cid4008154.html'] def parse(self, response): item=DangdangItem() item["title"]=response.xpath("//a[@name='itemlist-picture']/@title").extract() item["link"]=response.xpath("//a[@name='itemlist-picture']/@href").extract() item["comment"]=response.xpath("//a[@name='itemlist-review']/text()").extract() yield item # 将获取的数据添加到列表中 data = [] for i in range(len(item["title"])): row = [] row.append(item["title"][i]) row.append(item["link"][i]) row.append(item["comment"][i]) data.append(row) # 写入csv文件 with open('data.csv', 'a+', newline='') as csvfile: writer = csv.writer(csvfile) if csvfile.tell() == 0: writer.writerow(['titlle', 'link', 'comment'])# 写入表头 for row in data: writer.writerow(row) #翻页,一直到80页 for i in range(2,81): url='http://category.dangdang.com/pg'+str(i)+'-cid4008154.html' yield Request(url,callback=self.parse)在终端输入 scrapy crawl dd --nolog报错语法错误
时间: 2023-12-06 19:43:06 浏览: 141
scrapy+sqlserver+mongodb 论文数据采集,支持中英双文解析.zip
你的代码中存在一些语法错误,可能是因为缩进不正确或缺少一些语法符号。请检查以下代码,看看是否有任何拼写错误或语法错误:
```
import scrapy
from dangdang.items import DangdangItem
from scrapy.http import Request
import csv
class DdSpider(scrapy.Spider):
name = 'dd'
allowed_domains = ['dangdang.com']
start_urls = ['http://category.dangdang.com/pg1-cid4008154.html']
def parse(self, response):
item = DangdangItem()
item["title"] = response.xpath("//a[@name='itemlist-picture']/@title").extract()
item["link"] = response.xpath("//a[@name='itemlist-picture']/@href").extract()
item["comment"] = response.xpath("//a[@name='itemlist-review']/text()").extract()
yield item
# 将获取的数据添加到列表中
data = []
for i in range(len(item["title"])):
row = []
row.append(item["title"][i])
row.append(item["link"][i])
row.append(item["comment"][i])
data.append(row)
# 写入csv文件
with open('data.csv', 'a+', newline='') as csvfile:
writer = csv.writer(csvfile)
if csvfile.tell() == 0:
writer.writerow(['title', 'link', 'comment'])# 写入表头
for row in data:
writer.writerow(row)
#翻页,一直到80页
for i in range(2, 81):
url = 'http://category.dangdang.com/pg' + str(i) + '-cid4008154.html'
yield Request(url, callback=self.parse)
```
如果还有错误,请提供完整的错误消息以便更好地诊断问题。
阅读全文