from bs4 import BeautifulSoup import requests url='http://pic.netbian.com/4kqiche/' headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} resp=requests.get(url=url, headers=headers,verify=False) soup = BeautifulSoup(resp.text, 'lxml') resp.encoding='gbk' div=soup.find_all('div',class_='clearfix') for divs in div: href='http://pic.netbian.com/tupian/31686.html'+divs.find('a')['href'] resp2=requests.get(url=url, headers=headers,verify=False) soup2=BeautifulSoup(resp2.text,'lxml') resp2.encoding='gbk' soup3=BeautifulSoup(resp2.text,'lxml') title=soup2.find('div',class_='photo-pic').find('img')['src'] title_name=title.split('/')[-1] addhref='http://pic.netbian.com'+title resp3=requests.get(url=title,headers=headers,verify=False) with open('img', 'wb') as f: f.write(resp3) f.close()改正这段代码
时间: 2024-01-11 11:03:26 浏览: 136
python爬虫练习:爬图片
以下是改正后的代码:
```
from bs4 import BeautifulSoup
import requests
url = 'http://pic.netbian.com/4kqiche/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
resp = requests.get(url=url, headers=headers, verify=False)
soup = BeautifulSoup(resp.text, 'lxml')
resp.encoding = 'gbk'
div = soup.find_all('div', class_='clearfix')
for divs in div:
href = 'http://pic.netbian.com/tupian/31686.html' + divs.find('a')['href']
resp2 = requests.get(url=href, headers=headers, verify=False)
soup2 = BeautifulSoup(resp2.text, 'lxml')
resp2.encoding = 'gbk'
title = soup2.find('div', class_='photo-pic').find('img')['src']
title_name = title.split('/')[-1]
addhref = 'http://pic.netbian.com' + title
resp3 = requests.get(url=addhref, headers=headers, verify=False)
with open(title_name, 'wb') as f:
f.write(resp3.content)
f.close()
```
改正的地方主要有:
1. 将发送 GET 请求的 url 参数改为 href,以获取正确的网页内容。
2. 修改保存图片的文件名,将 'img' 改为 title_name。
3. 在下载图片时,将 resp3 改为 resp3.content,以获取图片的二进制数据。同时,将文件名改为 title_name。
阅读全文