import requests from bs4 import BeautifulSoup headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50'} url='http://www.biquge5200.cc' url1='http://www.biquge5200.cc/0_111/' resoponse=requests.get(url1,headers=headers,timeout=50) html=BeautifulSoup(resoponse.text,"html.parser") href=html.find('div',{'id':'list'}) href_list=[] for i in href.find_all("dd"): a=i.find('a') href=a['href'] urls=url+href href_list.append(urls) for h in href_list: try: responses=requests.get(h,headers=headers,timeout=50) htmls=BeautifulSoup(responses.text,'html.parser') title=htmls.find('div',{'class':'bookname'}).h1.text content=htmls.find('div',{'id':'content'}).text with open(f'D:\python案例\高武:神话最强传说\{title}.txt','w',encoding='utf-8')as f: f.write(content) print(title,'下载完成') except: continue 添加多个线程
时间: 2023-11-23 13:07:40 浏览: 271
你可以使用 Python 的 `threading` 模块来实现多线程下载。以下是修改后的代码:
```python
import requests
from bs4 import BeautifulSoup
import threading
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.50'
}
url = 'http://www.biquge5200.cc'
url1 = 'http://www.biquge5200.cc/0_111/'
resoponse = requests.get(url1, headers=headers, timeout=50)
html = BeautifulSoup(resoponse.text, "html.parser")
href = html.find('div', {'id': 'list'})
href_list = []
for i in href.find_all("dd"):
a = i.find('a')
href = a['href']
urls = url + href
href_list.append(urls)
def download_book(url):
try:
responses = requests.get(url, headers=headers, timeout=50)
htmls = BeautifulSoup(responses.text, 'html.parser')
title = htmls.find('div', {'class': 'bookname'}).h1.text
content = htmls.find('div', {'id': 'content'}).text
with open(f'D:\python案例\高武:神话最强传说\{title}.txt', 'w', encoding='utf-8') as f:
f.write(content)
print(title, '下载完成')
except:
pass
threads = []
for h in href_list:
t = threading.Thread(target=download_book, args=(h,))
threads.append(t)
for t in threads:
t.start()
for t in threads:
t.join()
```
这个版本的代码使用多线程来下载小说,每个线程都会下载一个章节。创建线程的时候,将 `download_book` 函数作为线程的目标函数,同时传入章节的 URL 作为参数。在主线程中启动所有的线程,等待所有线程执行完毕再结束程序。
阅读全文