用Python编写爬取百度图片中猫和狗的图片各100张,在本地创建两个文件夹分别为cat和dog,将图片存入相应文件夹的四种代码
时间: 2024-06-10 10:05:46 浏览: 136
以下是四种代码实现爬取百度图片中猫和狗的图片各100张并保存到本地文件夹的方法:
方法一:
```python
import requests
import os
# 设置请求头
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
# 搜索关键词
keywords = ['猫', '狗']
# 搜索结果数量
max_nums = 100
# 文件存储路径
save_dir = os.getcwd()
# 创建文件夹
for kw in keywords:
if not os.path.exists(os.path.join(save_dir, kw)):
os.mkdir(os.path.join(save_dir, kw))
# 爬取图片
for kw in keywords:
for i in range(0, max_nums, 30):
url = f'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&queryWord={kw}&word={kw}&pn={i}&rn=30'
response = requests.get(url, headers=headers)
for image in response.json()['data']:
try:
response = requests.get(image['middleURL'], headers=headers)
with open(os.path.join(save_dir, kw, f'{image["fromURL"].split("/")[-1]}_{image["di"]}.jpg'), 'wb') as f:
f.write(response.content)
except:
pass
```
方法二:
```python
from urllib.request import urlretrieve
from urllib.request import urlopen
from bs4 import BeautifulSoup
import os
# 设置搜索关键词
keywords = ['猫', '狗']
# 设置搜索结果数量
max_nums = 100
# 文件存储路径
save_dir = os.getcwd()
# 创建文件夹
for kw in keywords:
if not os.path.exists(os.path.join(save_dir, kw)):
os.mkdir(os.path.join(save_dir, kw))
# 爬取图片
for kw in keywords:
for i in range(0, max_nums, 30):
url = f'https://image.baidu.com/search/index?tn=baiduimage&ipn=r&rsv_idx=2&word={kw}&pn={i}&f=3'
html = urlopen(url)
bsObj = BeautifulSoup(html)
image_tags = bsObj.findAll("img", {"src": True})
for image_tag in image_tags:
image_url = image_tag["src"]
try:
urlretrieve(image_url, os.path.join(save_dir, kw, f'{image_url.split("/")[-1]}_{i}.jpg'))
except:
pass
```
方法三:
```python
import requests
import re
import os
# 设置搜索关键词
keywords = ['猫', '狗']
# 设置搜索结果数量
max_nums = 100
# 文件存储路径
save_dir = os.getcwd()
# 创建文件夹
for kw in keywords:
if not os.path.exists(os.path.join(save_dir, kw)):
os.mkdir(os.path.join(save_dir, kw))
# 爬取图片
for kw in keywords:
for i in range(1, max_nums // 30 + 1):
url = f'https://image.baidu.com/search/flip?tn=baiduimage&ie=utf-8&word={kw}&pn={i*30-30}&gsm=78'
response = requests.get(url).text
image_urls = re.findall('"objURL":"(.*?)",', response, re.S)
for j, image_url in enumerate(image_urls):
try:
response = requests.get(image_url, timeout=10)
with open(os.path.join(save_dir, kw, f'{kw}_{i*30-30+j}.jpg'), 'wb') as f:
f.write(response.content)
except:
pass
```
方法四:
```python
import urllib.parse
import urllib.request
import os
import json
# 设置搜索关键词
keywords = ['猫', '狗']
# 设置搜索结果数量
max_nums = 100
# 文件存储路径
save_dir = os.getcwd()
# 创建文件夹
for kw in keywords:
if not os.path.exists(os.path.join(save_dir, kw)):
os.mkdir(os.path.join(save_dir, kw))
# 爬取图片
for kw in keywords:
for i in range(1, max_nums // 30 + 1):
url = f'https://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result&queryWord={urllib.parse.quote(kw)}&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=&z=&ic=&word={urllib.parse.quote(kw)}&s=&se=&tab=&width=&height=&face=&istype=&qc=&nc=1&fr=&pn={i*30-30}&rn=30&gsm=1e&1521863315457='
request = urllib.request.Request(url)
response = urllib.request.urlopen(request)
data = response.read().decode('utf-8')
data = json.loads(data)
for j, image_info in enumerate(data['data']):
try:
image_url = image_info['thumbURL']
response = urllib.request.urlopen(image_url)
with open(os.path.join(save_dir, kw, f'{kw}_{i*30-30+j}.jpg'), 'wb') as f:
f.write(response.read())
except:
pass
```
这些代码都可以让你用Python编写爬取百度图片中猫和狗的图片各100张,并将它们保存到本地文件夹的功能。
阅读全文