import requests from bs4 import BeautifulSoup def get_html(url): headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} try: response = requests.get(url, headers=headers) response.raise_for_status() response.encoding = response.apparent_encoding return response.text except: return "" def parse_html(html): soup = BeautifulSoup(html, 'html.parser') book_list = soup.find_all('li', class_='subject-item') for book in book_list: title = book.find('div', class_='info').find('a')['title'] author_info = book.find('div', class_='pub').get_text().split('/') author = author_info[0].strip() publisher = author_info[-3].strip() print('书名:', title) print('作者:', author) print('出版社:', publisher) print('--------------------------') if __name__ == '__main__': for i in range(0,1000,20): url = 'https://book.douban.com/tag/%E5%8E%86%E5%8F%B2?start='+str(i)+'&type=T' html = get_html(url) parse_html(html)错误修改
时间: 2024-04-26 21:26:56 浏览: 139
感谢提供代码,这个代码中的问题是没有加上判断是否有书籍信息,所以在最后一页时会报错。同时,代码中的缩进格式也需要调整,可以参考下面的修改后的代码:
```python
import requests
from bs4 import BeautifulSoup
def get_html(url):
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
try:
response = requests.get(url, headers=headers)
response.raise_for_status()
response.encoding = response.apparent_encoding
return response.text
except:
return ""
def parse_html(html):
soup = BeautifulSoup(html, 'html.parser')
book_list = soup.find_all('li', class_='subject-item')
if not book_list:
print('没有更多书籍了!')
return
for book in book_list:
title = book.find('div', class_='info').find('a')['title']
author_info = book.find('div', class_='pub').get_text().split('/')
author = author_info[0].strip()
publisher = author_info[-3].strip()
print('书名:', title)
print('作者:', author)
print('出版社:', publisher)
print('--------------------------')
if __name__ == '__main__':
for i in range(0, 1000, 20):
url = 'https://book.douban.com/tag/%E5%8E%86%E5%8F%B2?start=' + str(i) + '&type=T'
html = get_html(url)
parse_html(html)
```
在这个代码中,我们加入了对书籍列表是否为空的判断,避免了在最后一页时出现的错误。同时,我们将函数的缩进格式进行了调整,使代码更加清晰易读。
阅读全文