用Python语言编写爬取百度图片中猫和狗的图片各100张,将图片存入相应文件夹,两个文件夹分别为cat和dog的代码
时间: 2024-06-10 22:04:45 浏览: 127
import requests
import os
def download_image(url, save_path):
response = requests.get(url, stream=True)
with open(save_path, 'wb') as out_file:
out_file.write(response.content)
del response
def download_images(query, folder, num_images):
url = 'https://www.baidu.com/s'
params = {'tn': 'result_top&word={0}'.format(query)}
if not os.path.exists(folder):
os.makedirs(folder)
img_urls = []
count = 0
while len(img_urls) < num_images:
response = requests.get(url, params=params)
html = response.content.decode('utf-8')
start_index = html.find('"objURL":"')
while start_index != -1 and len(img_urls) < num_images:
end_index = html.find('"', start_index + 10)
img_url = html[start_index + 10:end_index]
start_index = html.find('"objURL":"', end_index)
img_urls.append(img_url)
response.close()
params['pn'] += 1
for img_url in img_urls:
try:
save_path = os.path.join(folder, '{0}.jpg'.format(count))
download_image(img_url, save_path)
count += 1
if count % 10 == 0:
print('已下载 {0} 张...'.format(count))
except:
continue
if count == num_images:
break
if __name__ == '__main__':
download_images('猫', 'cat', 100)
download_images('狗', 'dog', 100)
print('图片下载完成!')
阅读全文