#Web元素提取 def getHTML(htmlpath): #读取HTML文件内容 f=open(htmlpath,'r',encoding='utf-8') ls=f.readlines() f.close() return ls def extractImageUrls(htmllist): #解析文件并提取图像的URL urls=[] for line in htmllist: if 'img'in line: #图像用img标签表示 url=line.split('src=')[-1].split('"')[1] if 'http' in url: #每个URL都以http开头 urls.append(url) return urls def showResults(urls): #将获取的链接输出 for url in urls: url1=url.strip().split('\t') name='20180112025424524.png' if name in urls: print('png图像文件为:20180112025424524.png对应的URL为:{}'.format(url1)) def saveResults(filepath,urls): #保存结果到文件 f=open(filepath,'w') for url in urls: f.write(url+'\n') f.close() def main(): inputfile="C:\Users\86183\Downloads\nationalgeographic.html" outputfile='nationalgeographic.txt' htmlLines=getHTML(inputfile) imagUrls=extractImageUrls(htmlLines) showResults(imagUrls) saveResults(outputfile,imagUrls) main() 如何修改这段Python代码,使其实现用print实现如下输出: 图像列表中,png图像文件为:20180112025424524.png 对应的URL为:http://image.nationalgeographic.com.cn/2018/0112/20180112025424524.png
时间: 2024-03-14 09:50:09 浏览: 94
GetHtml.rar_Gethtml_delphi ie html_gethtml.rar _html
您可以将showResults函数修改为以下代码:
```
def showResults(urls):
for url in urls:
if '20180112025424524.png' in url:
print("图像列表中,png图像文件为:20180112025424524.png 对应的URL为:{}".format(url))
```
阅读全文