import requests from bs4 import BeautifulSoup from threading import Thread def crawl_books(start, end): for i in range(start, end): url = 'http://search.dangdang.com/?key=%BC%C6%CB%E3%BB%FA&act=input&page_index={}'.format(i) response = requests.get(url) page = response.text soup = BeautifulSoup(page, 'lxml') books = soup.find('ul', class_='bigimg') for book in books.find_all('li'): title = book.find('a', class_='pic').get('title') author = book.find('p', class_='search_book_author').text price = book.find('p', class_='price').find('span', class_='search_now_price').text book_link = 'https:' + book.find('a', class_='pic').get('href') response = requests.get(book_link) page = response.text soup = BeautifulSoup(page, 'lxml') comment_num = soup.find('a', class_='review_num').text print(title, author, price, comment_num) threads = [] for i in range(1, 101, 10): t = Thread(target=crawl_books, args=(i, i+10)) threads.append(t) t.start() for t in threads: t.join()修改
时间: 2024-02-29 11:53:57 浏览: 79
import reimport requestsfrom bs4 import BeautifulSoupimport t
根据错误信息 `'NoneType' object has no attribute 'text'`,我们可以猜测问题出在找不到评论数量上。你可以添加一些条件来检查是否能够成功找到评论数量,例如:
```
comment_num_tag = soup.find('a', class_='review_num')
if comment_num_tag:
comment_num = comment_num_tag.text
else:
comment_num = '0'
```
完整代码如下:
```python
import requests
from bs4 import BeautifulSoup
from threading import Thread
def crawl_books(start, end):
for i in range(start, end):
url = 'http://search.dangdang.com/?key=%BC%C6%CB%E3%BB%FA&act=input&page_index={}'.format(i)
response = requests.get(url)
page = response.text
soup = BeautifulSoup(page, 'l
阅读全文