利用Python多线程爬取网站:https://wall.alphacoders.com/by_sub_category.php?id=304473&name=%E9%9D%92%E6%98%A5%E7%8C%AA%E5%A4%B4%E5%B0%91%E5%B9%B4%E4%B8%8D%E4%BC%9A%E6%A2%A6%E5%88%B0%E5%85%94%E5%A5%B3%E9%83%8E%E5%AD%A6%E5%A7%90+%E5%A3%81%E7%BA%B8&lang=Chinese 的所有图片,给出完整代码
时间: 2024-04-01 20:32:26 浏览: 180
python 爬取网页
以下是一个使用 Python 多线程爬取 https://wall.alphacoders.com/by_sub_category.php?id=304473&name=%E9%9D%92%E6%98%A5%E7%8C%AA%E5%A4%B4%E5%B0%91%E5%B9%B4%E4%B8%8D%E4%BC%9A%E6%A2%A6%E5%88%B0%E5%85%94%E5%A5%B3%E9%83%8E%E5%AD%A6%E5%A7%90+%E5%A3%81%E7%BA%B8&lang=Chinese 的所有图片的完整代码:
```python
import os
import requests
import threading
from bs4 import BeautifulSoup
# 保存图片的路径
SAVE_PATH = './images'
# 获取网页的URL
URL = 'https://wall.alphacoders.com/by_sub_category.php?id=304473&name=%E9%9D%92%E6%98%A5%E7%8C%AA%E5%A4%B4%E5%B0%91%E5%B9%B4%E4%B8%8D%E4%BC%9A%E6%A2%A6%E5%88%B0%E5%85%94%E5%A5%B3%E9%83%8E%E5%AD%A6%E5%A7%90+%E5%A3%81%E7%BA%B8&lang=Chinese'
# 获取页面的 HTML 代码
def get_html(url):
res = requests.get(url)
res.encoding = 'utf-8'
return res.text
# 获取每个图片的 URL
def get_image_urls(html):
soup = BeautifulSoup(html, 'html.parser')
image_urls = []
for img_tag in soup.find_all('img', {'class': 'thumbimg'}):
image_url = img_tag['src']
image_url = image_url.replace('thumb-350-', '')
image_urls.append(image_url)
return image_urls
# 下载图片
def download_image(image_url):
res = requests.get(image_url, stream=True)
if res.status_code == 200:
with open(os.path.join(SAVE_PATH, image_url.split('/')[-1]), 'wb') as f:
for chunk in res.iter_content(1024):
f.write(chunk)
# 多线程下载图片
def multi_thread_download(image_urls):
threads = []
for image_url in image_urls:
t = threading.Thread(target=download_image, args=(image_url,))
threads.append(t)
t.start()
for t in threads:
t.join()
if __name__ == '__main__':
html = get_html(URL)
image_urls = get_image_urls(html)
if not os.path.exists(SAVE_PATH):
os.makedirs(SAVE_PATH)
multi_thread_download(image_urls)
```
注意:运行此代码会下载大量的图片,需要一定的时间和存储空间。请根据自己的实际情况进行调整。
阅读全文