import requests import random import parsel visited_urls = set() # 保存已经访问过的链接 page = 1 while True: # 循环爬取 url = f'https://travel.qunar.com/travelbook/list.htm?page={page}&order=hot_heat' html_data = requests.get(url).text selector = parsel.Selector(html_data) lis = selector.css('.list_item h2 a::attr(href)').getall() # 遍历当前页面中的所有链接 for li in lis: detail_url = 'https://travel.qunar.com' + li if detail_url in visited_urls: # 如果链接已经访问过,则跳过 continue visited_urls.add(detail_url) # 将链接加入集合中 detail_html = requests.get(detail_url).text time.sleep(random.randint(3, 5)) detail_selector = parsel.Selector(detail_html) title = detail_selector.css('.b_crumb_cont *:nth-child(3)::text').get() comment = detail_selector.css('.title.white::text').get() view_count = detail_selector.css('.view_count::text').get() date = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.when > p > span.data::text').get() days = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.howlong > p > span.data::text').get() money = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.howmuch > p > span.data::text').get() character = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.who > p > span.data::text').get() play_list = detail_selector.css('#js_mainleft > div.b_foreword > ul > li.f_item.how > p > span.data > span::text').getall() for play in play_list: print(play) print(title, comment, view_count, date, days, money, character, play_list, detail_url) time.sleep(random.randint(3, 5)) # 判断是否需要继续爬取 if len(lis) == 0 or page >= 20: break page += 1这段代码怎么将爬取出来的所有数据保存到csv文件
时间: 2024-03-27 09:36:22 浏览: 41
可以使用Python内置的csv库将数据保存到csv文件中。你需要先导入csv库,然后在代码中添加以下代码:
```python
import csv
# 将数据保存到csv文件中
with open('data.csv', mode='w', newline='') as csv_file:
fieldnames = ['title', 'comment', 'view_count', 'date', 'days', 'money', 'character', 'play_list', 'detail_url']
writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
writer.writeheader()
for data in all_data:
writer.writerow(data)
```
其中,`fieldnames`是csv文件中的每一列的标题,`all_data`是一个列表,包含了所有爬取到的数据。在循环中,我们将每一条数据写入csv文件中。最后通过`writerow`方法将数据写入csv文件。保存后,你可以在当前目录下找到`data.csv`文件。
阅读全文