在这段代码的基础上增加能够爬取每本书评论数的代码import requests from bs4 import BeautifulSoup from threading import Thread def crawl_books(start, end): for i in range(start, end): url = 'http://search.dangdang.com/?key=%BC%C6%CB%E3%BB%FA&act=input&page_index={}'.format(i) response = requests.get(url) page = response.text soup = BeautifulSoup(page, 'lxml') books = soup.find('ul', class_='bigimg') for book in books.find_all('li'): title = book.find('a', class_='pic').get('title') author = book.find('p', class_='search_book_author').text price = book.find('p', class_='price').find('span', class_='search_now_price').text print(title, author, price) threads = [] for i in range(1, 101, 10): t = Thread(target=crawl_books, args=(i, i+10)) threads.append(t) t.start() for t in threads: t.join()
时间: 2024-02-29 19:54:14 浏览: 119
使用BeautifulSoup进行书籍爬虫(新手必看)
可以在循环遍历每本书的代码块中增加一个爬取评论数的代码块,具体操作如下:
1. 在循环中找到每本书的链接,例如:
```
book_link = book.find('a', class_='pic').get('href')
```
2. 访问该链接,获取评论数,例如:
```
response = requests.get(book_link)
page = response.text
soup = BeautifulSoup(page, 'lxml')
comment_num = soup.find('a', class_='review_num').text
```
3. 将评论数打印出来,例如:
```
print(title, author, price, comment_num)
```
完整代码如下:
```
import requests
from bs4 import BeautifulSoup
from threading import Thread
def crawl_books(start, end):
for i in range(start, end):
url = 'http://search.dangdang.com/?key=%BC%C6%CB%E3%BB%FA&act=input&page_index={}'.format(i)
response = requests.get(url)
page = response.text
soup = BeautifulSoup(page, 'lxml')
books = soup.find('ul', class_='bigimg')
for book in books.find_all('li'):
title = book.find('a', class_='pic').get('title')
author = book.find('p', class_='search_book_author').text
price = book.find('p', class_='price').find('span', class_='search_now_price').text
book_link = book.find('a', class_='pic').get('href')
response = requests.get(book_link)
page = response.text
soup = BeautifulSoup(page, 'lxml')
comment_num = soup.find('a', class_='review_num').text
print(title, author, price, comment_num)
threads = []
for i in range(1, 101, 10):
t = Thread(target=crawl_books, args=(i, i+10))
threads.append(t)
t.start()
for t in threads:
t.join()
```
阅读全文