增加代码对评论数量的爬取import requests from bs4 import BeautifulSoup from threading import Thread def crawl_books(start, end): for i in range(start, end): url = 'http://search.dangdang.com/?key=%BF%C6%BB%C3%D0%A1%CB%B5&act=input&page_index=1&sort_type=sort_default#J_tab'.format(i) response = requests.get(url) page = response.text soup = BeautifulSoup(page, 'lxml') books = soup.find('ul', class_='bigimg') for book in books.find_all('li'): title = book.find('a', class_='pic').get('title') author = book.find('p', class_='search_book_author').text price = book.find('p', class_='price').find('span', class_='search_now_price').text print(title, author, price) threads = [] for i in range(1, 101, 10): t = Thread(target=crawl_books, args=(i, i+10)) threads.append(t) t.start() for t in threads: t.join()
时间: 2024-02-29 11:55:06 浏览: 124
要增加代码对评论数量的爬取,您需要先确定评论数量的位置和元素,然后在循环中找到该元素并提取其文本。以下是一个修改过的爬取代码示例:
import requests
from bs4 import BeautifulSoup
from threading import Thread
def crawl_books(start, end):
for i in range(start, end):
url = 'http://search.dangdang.com/?key=%BF%C6%BB%C3%D0%A1%CB%B5&act=input&page_index=1&sort_type=sort_default#J_tab'.format(i)
response = requests.get(url)
page = response.text
soup = BeautifulSoup(page, 'lxml')
books = soup.find('ul', class_='bigimg')
for book in books.find_all('li'):
title = book.find('a', class_='pic').get('title')
author = book.find('p', class_='search_book_author').text
price = book.find('p', class_='price').find('span', class_='search_now_price').text
comment = book.find('p', class_='search_star_line').find_all('a')[2].text # 获取评论数量元素并提取文本
print(title, author, price, comment)
threads = []
for i in range(1, 101, 10):
t = Thread(target=crawl_books, args=(i, i+10))
threads.append(t)
t.start()
for t in threads:
t.join()
在这个示例中,我们使用了`book.find('p', class_='search_star_line').find_all('a')[2].text`来找到评论数量元素并提取文本。您可能需要根据网站的具体结构进行调整。
阅读全文