if sys.version[0] == "3": from urllib.request import urlretrieve else: from urllib import urlretrieve

这段代码是用来检查 Python 版本并导入对应的模块，以便下载文件。如果 Python 版本大于等于 3，就从 urllib.request 模块中导入 urlretrieve 函数；否则从 urllib 模块中导入 urlretrieve 函数。这段代码的作用是为了保证代码在不同版本的 Python 中都能正常运行。

import requests import urllib.request import os def quest_find(quest_url, awme_id): params = {"id": awme_id} respon = requests.get(quest_url, params=params).json() return respon["data"], respon["code"] def re_down(url,filename): try: urllib.request.urlretrieve(url,filename) except urllib.error.ContentTooShortError: print ('Network conditions is not good. Reloading...') re_down(url,filename) # 获取视频URL，并下载 if name == 'main': quest_url = "http://discover-rpc.cmm-crawler-intranet.k8s.limayao.com/play_url" save_path = "/home/algodev/sujunbin/whisper/test_model/video%s" %time if not os.path.exists(save_path): os.mkdir(save_path) awme_ids = ['7119114587735100687'] with open('id_time.txt','r') as file: for line in file.readlines(): line = line.split() id = line[0] time1 = int(line[1]) if time1<10000: time ='<10s' elif 10000<=time1<20000: time='10-20s' elif 20000<=time1<30000: time='20-30s' elif 30000<=time1<40000: time='30-40s' elif 40000<=time1<50000: time='40-50s' elif 50000<=time1<60000: time='50-60s' elif 60000<=time1<90000: time='60-90s' elif 90000<=time1<120000: time='90-120s' elif 120000<=time1<180000: time='120-180s' elif time1>=180000: time='>180s' save_path = "/home/algodev/sujunbin/whisper/test_model/video%s" %time if not os.path.exists(save_path): os.mkdir(save_path) data_json, code = quest_find(quest_url, id) play_url = data_json['play_url'] video_name = id + '.mp4' save_video_path = os.path.join(save_path, video_name) re_down(data_json['play_url'], save_video_path) print(save_video_path) for i in range(len(awme_ids)): data_json, code = quest_find(quest_url, awme_ids[i]) play_url = data_json['play_url'] video_name = awme_ids[i] + '.mp4' save_video_path = os.path.join(save_path, video_name) urllib.request.urlretrieve(data_json['play_url'], save_video_path) print(save_video_path) print("done!")这段代码有什么问题

这段代码可能存在以下问题： 1. 缺少必要的模块导入语句，需要添加 `import time`。 2. 代码中的变量名 `awme_ids` 可能有误，应该改为 `aweme_ids`。 3. 在 `re_down()` 函数中，若下载失败会无限递归调用自身，可能导致内存溢出。建议添加一个计数器，若连续下载失败次数超过一定阈值，则放弃下载。 4. 在循环过程中，多次声明 `save_path` 变量，覆盖了之前的赋值，应该在循环外部声明。 5. 可能存在无法下载视频的情况，需要添加异常处理机制，例如 `urllib.error.HTTPError`、`urllib.error.URLError`、`socket.timeout` 等异常。 6. 代码中的请求 URL 是内网地址，无法在公网环境下使用。 7. 可能存在多个视频对应同一个保存路径的情况，应该在视频名中添加一个唯一标识，例如时间戳或视频 ID。建议在代码中添加适当的注释和异常处理，以提高代码的可读性和健壮性。

import requests from lxml import etree import os from urllib.request import urlretrieve import sys url='https://www.huya.com/g' headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36'} response=requests.get(url,headers=headers).content.decode() html=etree.HTML(response) req=html.xpath('//li[@class="g-gameCard-item"]//a//@data-gid') names=html.xpath('//li[@class="g-gameCard-item"]//a//p/text()') i=0 mylog=open('关键字大全.log',mode='a',encoding='utf-8') print(' -----------查找关键字大全-----------',file=mylog) #这里只是为了查看关键字美观而已,保存到记事本里 for re,name in zip(req,names): print('关键字:%s 查找id号:%s'%(name,re),end=' ',file=mylog) i+=1 if i==5: print('\n',file=mylog) i=0 mylog.close() name_1=input('\n请输入查找关键字：') for re,name in zip(req,names): if name_1==name: break if name_1!=name: print('没有你想查找的关键字') else: url='https://www.huya.com/g/{}#tag0'.format(re) data=requests.get(url,headers=headers).content.decode() html=etree.HTML(data) #链接 link_list=html.xpath('//a/img[@class="pic"]/@data-original') #名字 names_1_list=html.xpath('//span[@class="avatar fl"]//img/@title') # 文件名字 os.mkdir(name) for link, names_1 in zip(link_list, names_1_list): # 获取链接 link = link.split('?')[0] try: # 保存数据 urlretrieve(link, name + '/' + names_1 + '.jpg') print(names_1 + '100%') except: pass

这段代码是一个Python爬虫程序，用于从虎牙直播网站上爬取游戏直播分类下的主播头像，保存到本地文件夹中。具体实现过程如下： 1. 导入requests、lxml、os和urllib库，用于发送http请求、解析html文档、创建文件夹和下载图片。 2. 设置headers，伪装成浏览器的请求头，避免被服务器识别为爬虫。 3. 发送http请求，获取游戏直播分类下的html文档。 4. 使用xpath解析html文档，获取所有游戏id和游戏名字。 5. 将关键字和对应的id保存到本地记事本中，以便后续查看。 6. 等待用户输入要查找的关键字，根据关键字查找对应的游戏id。 7. 根据游戏id构造新的url，重新发送http请求，获取游戏主播列表的html文档。 8. 使用xpath解析html文档，获取所有主播头像的链接和主播名字。 9. 根据主播名字创建文件夹，遍历所有主播头像链接，下载头像并保存到对应的文件夹中。需要注意的是，这段代码仅供学习和参考使用，对于未经授权的爬取行为，请勿使用。同时，爬取过程中应当遵守相关法律法规和网站的使用协议，不得侵犯他人的隐私和知识产权。

阅读全文

if sys.version[0] == "3": from urllib.request import urlretrieve else: from urllib import urlretrieve

相关推荐

使用urllib库的urlretrieve()方法下载网络文件到本地的方法

Python urllib模块urlopen()与urlretrieve()详解

Python urllib.request对象案例解析

urllib.request.urlretrieve

urllib.request.urlretrieve详解

urllib.request.urlretrieve参数

urllib.request.urlretrieve 如果404会报错

urllib.request.urlretrieve下载文件不完整

urllib.request.urlretrieve(url, filename)

urllib.urlretrieve

attributeerror: module 'urllib' has no attribute 'urlretrieve'

AttributeError: module 'urllib' has no attribute 'urlretrieve'

大家在看

华为CloudIVS 3000技术主打胶片v1.0（C20190226）.pdf

dosbox:适用于Android的DosBox Turbo FreeBox

功率谱密度：时间历程的功率谱密度。-matlab开发

南京工业大学Python程序设计语言题库及答案

Windows6.1--KB2533623-x64.zip

最新推荐

Windows下操作Linux图形界面的VNC工具

【SketchUp Ruby API：从入门到精通】

VMware虚拟机打开虚拟网络编辑器出现由于找不到vnetlib.dll,无法继续执行代码。重新安装程序可能会解决问题

基于Preact的高性能PWA实现定期天气信息更新

从停机到上线，EMC VNX5100控制器SP更换的实战演练

ubuntu labelme中文版安装

全新免费HTML5商业网站模板发布

EMC VNX5100控制器SP更换全流程指南：新手到高手的必备技能

lamada函数

快速掌握C++ STL：30秒学会核心功能