python爬虫百度图片代码
时间: 2024-09-30 18:04:57 浏览: 54
Python爬取百度图片通常会用到`requests`库来发送HTTP请求获取网页内容,然后利用如BeautifulSoup、Pandas等库解析HTML,提取出图片链接。以下是一个简单的示例,展示如何使用`beautifulsoup4`库从搜索结果页抓取图片:
```python
import requests
from bs4 import BeautifulSoup
import os
def get_baidu_image_links(keyword):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
url = f'http://image.baidu.com/search/index?tn=baiduimage&ie=utf-8&word={keyword}'
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
# 图片链接通常在class为"i-bdimg"的元素中
img_tags = soup.find_all('a', class_='i-bdimg')
image_links = [img['href'] for img in img_tags]
return image_links
# 下载并保存图片
def download_images(image_urls, save_dir='images'):
if not os.path.exists(save_dir):
os.makedirs(save_dir)
for i, link in enumerate(image_urls[:10]): # 通常只下载部分图片
img_response = requests.get(link, headers=headers)
with open(os.path.join(save_dir, f'image_{i+1}.jpg'), 'wb') as f:
f.write(img_response.content)
# 使用示例
keyword = '猫'
image_links = get_baidu_image_links(keyword)
download_images(image_links)
阅读全文