将while True: # 构造新URL new_url = url.format(chanid=chanid, page=page) # 发送请求并获取网页内容 response = requests.get(new_url) html = response.text # 使用XPath提取data-chanid的值 tree = etree.HTML(html) data_chanid = tree.xpath('//a[@class="act"]/@data-chanid') # 如果没有获取到data-chanid的值,说明已经到达最后一页,退出循环 if not data_chanid: break # 将data-chanid的值赋给chanid变量 chanid = data_chanid[0] # 处理网页内容... # 增加翻页计数 page += 1添加到def Gethtml(self,i): self.url = f'https://www.qidian.com/rank/hotsales/chn&{data-chanid}/page{page}/' response = self.Uresponse().content.decode() tree = etree.HTML(response) li_list = tree.xpath('//*[@id="book-img-text"]/ul/li') for li in li_list: url_list = 'https:' + str(li.xpath('./div[3]/p/a[1]/@href')[0]) self.Gettypehtml(url_list)
时间: 2024-04-17 16:23:50 浏览: 99
你可以将之前提供的代码块添加到`Gethtml`方法中,如下所示:
```python
import requests
from lxml import etree
class YourClassName:
def __init__(self):
self.url = None
self.page = 1
def Gethtml(self, i):
url = 'https://www.qidian.com/rank/hotsales/chn{chanid}/page{page}/'
chanid = "-1"
while True:
# 构造新URL
new_url = url.format(chanid=chanid, page=self.page)
# 发送请求并获取网页内容
response = requests.get(new_url)
html = response.text
# 使用XPath提取data-chanid的值
tree = etree.HTML(html)
data_chanid = tree.xpath('//a[@class="act"]/@data-chanid')
# 如果没有获取到data-chanid的值,说明已经到达最后一页,退出循环
if not data_chanid:
break
# 将data-chanid的值赋给chanid变量
chanid = data_chanid[0]
# 处理网页内容...
# 增加翻页计数
self.page += 1
# 在循环中处理网页内容...
response = self.Uresponse().content.decode()
tree = etree.HTML(response)
li_list = tree.xpath('//*[@id="book-img-text"]/ul/li')
for li in li_list:
url_list = 'https:' + str(li.xpath('./div[3]/p/a[1]/@href')[0])
self.Gettypehtml(url_list)
def Uresponse(self):
response = requests.get(self.url)
return response
```
在这个示例代码中,`Gethtml`方法中的循环部分已被替换为之前提供的代码块,并添加了对`Gettypehtml`方法的调用。通过在每次循环中更新`self.page`变量,我们可以实现翻页效果。
请注意,以上代码仅提供了一个示例,你可能需要根据实际情况进行适当的调整和扩展。如果有其他问题,请随时提问。
阅读全文