import os import requests from bs4 import BeautifulSoup # 创建目录用于保存图片 if not os.path.exists('data'): os.mkdir('data') # 爬取网站 url = 'https://www.sucai999.com/pic/cate/263_267.html' response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # 获取图片链接并下载保存 img_tags = soup.find_all('img', class_='lazy') for index, img_tag in enumerate(img_tags[:20]): img_url = img_tag['data-original'] response = requests.get(img_url) with open(f'data/img{index}.jpg', 'wb') as f: f.write(response.content) print('图片保存完毕!')把这段代码优化成连续保存20张图片的代码
时间: 2023-06-24 22:07:39 浏览: 50
import os import requests from bs4 import BeautifulSoup # 创建目录用于保存图片 if not os.path.exists('data'): os.mkdir('data') # 爬取网站 url = 'https://www.sucai999.com/pic/cate/263_267.html' response = requests.get(url) soup = BeautifulSoup(response.text, 'html.parser') # 获取图片链接并下载保存 img_tags = soup.find_all('img', class_='lazy') for index, img_tag in enumerate(img_tags[:20]): img_url = img_tag['data-original'] response = requests.get(img_url) with open(f'data/img{index}.jpg', 'wb') as f: f.write(response.content) print(f'第{index+1}张图片保存完毕!')
相关问题
python爬电影排名用os bs4_编写python脚本利用requests+bs4模块爬取豆瓣电影top250的数据...
好的,您可以参考以下代码实现:
```python
import requests
from bs4 import BeautifulSoup
import os
# 获取豆瓣电影top250的页面信息
def get_html(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
try:
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
except requests.RequestException:
return None
# 解析页面信息,获取电影信息
def parse_html(html):
soup = BeautifulSoup(html, 'lxml')
movies = []
for item in soup.find_all('div', class_='item'):
movie = {}
movie['rank'] = item.em.text
movie['title'] = item.find('span', class_='title').text
movie['score'] = item.find('span', class_='rating_num').text
movie['quote'] = item.find('span', class_='inq').text
movies.append(movie)
return movies
# 将电影信息保存到本地文件中
def save_to_file(movies):
if not os.path.exists('./douban_movies'):
os.mkdir('./douban_movies')
with open('./douban_movies/top250.txt', 'w', encoding='utf-8') as f:
for movie in movies:
f.write('排名:{}\n电影名称:{}\n评分:{}\n简介:{}\n\n'.format(movie['rank'], movie['title'], movie['score'], movie['quote']))
# 爬取豆瓣电影top250的数据
def main():
url = 'https://movie.douban.com/top250'
html = get_html(url)
movies = parse_html(html)
save_to_file(movies)
if __name__ == '__main__':
main()
```
该脚本会爬取豆瓣电影top250的排名、电影名称、评分和简介,并将其保存到本地文件中。您可以根据自己的需求进行修改。
爬取学校官网校园风光的图片,输出并保存 https://www.chzc.edu.cn/xyfg/
我可以为您提供Python代码来实现该功能。您需要安装requests和beautifulsoup4库来运行此代码。请在终端或命令提示符中运行以下命令来安装这些库:
```
pip install requests
pip install beautifulsoup4
```
代码如下:
```python
import requests
from bs4 import BeautifulSoup
import os
# 网页链接
url = 'https://www.chzc.edu.cn/xyfg/'
# 发送请求
response = requests.get(url)
response.encoding = 'utf-8'
# 解析网页内容
soup = BeautifulSoup(response.text, 'html.parser')
# 获取所有图片标签
img_tags = soup.find_all('img')
# 创建文件夹
if not os.path.exists('xyfg'):
os.mkdir('xyfg')
# 下载图片并保存
for img_tag in img_tags:
img_url = 'https://www.chzc.edu.cn' + img_tag['src']
img_name = img_tag['alt'] + '.jpg'
img_path = os.path.join('xyfg', img_name)
with open(img_path, 'wb') as f:
f.write(requests.get(img_url).content)
print('已保存图片:', img_name)
```
该代码会将网页中所有的图片下载并保存到一个名为“xyfg”的文件夹中。您可以根据需要修改代码中的文件夹名称。