运行下面代码，运行结果没有保存文件，请帮我找出原因 # -- coding: utf-8 -- import urllib.request import re def getNovertContent(): url = 'http://www.quannovel.com/read/640/' req = urllib.request.Request(url) req.add_header('User-Agent', ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36') data = urllib.request.urlopen(req).read().decode('gbk') str1 = str(data) # 将网页数据转换为字符串 reg = r'<li><a href="(.?)" title=".?">(.?)</a><li>' reg = re.compile(reg) urls = reg.findall(str1) for url in urls: novel_url = url[0] novel_title = url[1] chapt = urllib.request.urlopen(novel_url).read() chapt_html = chapt.decode('gbk') reg = r'</script> (.?)</script type="text/javascript">' reg = re.compile(reg, re.S) chapt_content = reg.findall(chapt_html) chapt_content = chapt_content[0].replace(" ", "") chapt_content = chapt_content.replace("<br />", "") print("正在保存 %s" % novel_title) with open("{}.txt".format(novel_title), 'w', encoding='utf-8') as f: f.write(chapt_content) getNovertContent()

修改后，运行下面代码运行后文档内只有列索引，没有内容，请帮我找一下原因 # -- coding: utf-8 -- import urllib.request import re import pandas as pd import time def getdata(url): req = urllib.request.Request(url) req.add_header('User-Agent', ' Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36') data = urllib.request.urlopen(req).read().decode('utf-8') str1 = str(data) pat = ''' (.?) (.?) (.?) (.?) (.*?) ''' result = re.compile(pat).findall(str1) return result if name == 'main': for i in range(2018, 2020): print('正在收集第%d年数据' % i) rank = [] country = [] zhou = [] total = [] url = 'https://www.kuaiyilicai.com/stats/global/yearly/g_gdp/' + str(i) + '.html' data = getdata(url) for j in range(0, len(data)): rank.append(data[j][0]) country.append(data[j][1]) zhou.append(data[j][2]) total.append(data[j][3]) dataframe = pd.DataFrame({'排名': rank, '国家/地区': country, '所在洲': zhou, 'GDP(美元计)': total}) with open(str(i) + "年世界gdp排名.csv", mode="w", encoding="utf_8_sig") as f: dataframe.to_csv(f, index=False, sep=',') print(i, '年数据收集完成')

with open(str(i) + "年世界gdp排名.csv", mode="w", encoding="utf_8_sig") as f: dataframe.to_csv(f, index=False, header=True, sep=',') 如果问题仍然存在，你可以尝试检查生成的 pandas.DataFrame ...

下面代码运行后文档内只有标题，没有内容，请帮我找一下原因 # -- coding: utf-8 -- import urllib.request import re import pandas as pd import time def getdata(url): req = urllib.request.Request(url) req.add_header('User-Agent', ' Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36') data = urllib.request.urlopen(req).read().decode('utf-8') str1 = str(data) pat = ''' (.?) (.?) (.?) (.?) .? ''' result = re.compile(pat).findall(str1) return result if name == 'main': for i in range(2018, 2020): print('正在收集第%d年数据' % i) rank = [] country = [] zhou = [] total = [] url = 'https://www.kuaiyilicai.com/stats/global/yearly/g_gdp/' + str(i) + '.html' data = getdata(url) for j in range(0, len(data)): rank.append(data[j][0]) country.append(data[j][1]) zhou.append(data[j][2]) total.append(data[j][3]) dataframe = pd.DataFrame({'排名': rank, '国家/地区': country, '所在洲': zhou, 'GDP(美元计)': total}) with open(str(i) + "年世界gdp排名.csv", mode="w", encoding="utf_8_sig") as f: dataframe.to_csv(f, index=False, sep=',') print(i, '年数据收集完成')

这个代码的问题可能是在正则表达式的匹配上。你可以把这一行代码： pat = '''<tr> (.*?)</td> (.*?)</td> (.*?)</td> (.*?)</td> <td>.?</td> </tr>''' 修改为： pat = '''<tr> (.*?)</td> (.*?)...

# -- coding: utf-8 -- import urllib.request import re def getNovertContent(): url = 'http://www.quannovel.com/read/640/' req = urllib.request.Request(url) req.add_header('User-Agent', ' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36') data = urllib.request.urlopen(req).read().decode('gbk') str1 = str(data) # 将网页数据转换为字符串 reg = r'(.?)' reg = re.compile(reg) urls = reg.findall(str1) for url in urls: novel_url = url[0] novel_title = url[1] chapt = urllib.request.urlopen(novel_url).read() chapt_html = chapt.decode('gbk') reg = r'</script> (.?)</script type="text/javascript">' reg = re.compile(reg, re.S) chapt_content = reg.findall(chapt_html) chapt_content = chapt_content[0].replace( " ", "") chapt_content = chapt_content.replace("
", "") print("正在保存 %s" % novel_title) with open("{}.txt".format(novel_title), 'w', encoding='utf-8') as f: f.write(chapt_content) getNovertContent()

这段代码是用Python编写的，其功能是从一个小说网站上爬取小说的章节内容，并保存为txt文件。具体的实现过程如下： 1. 首先导入了两个Python标准库urllib和re。其中，urllib库用于进行网络请求和数据下载，re库用于...

这段代码是干什么用的# -- coding: utf-8 -- import time import uuid import hashlib import base64 import ssl import urllib.request import hmac from hashlib import sha256 # 必填,请参考"开发准备"获取如下数据,替换为实际值 realUrl = 'https://rtcpns.cn-north-1.myhuaweicloud.com/rest/caas/relationnumber/partners/v1.0' #APP接入地址+接口访问URI APP_KEY = "a1****" #APP_Key APP_SECRET = "cfc8" #APP_Secret ''' 选填,各参数要求请参考"AXB模式解绑接口" subscriptionId和relationNum为二选一关系,两者都携带时以subscriptionId为准 ''' subscriptionId = '' #指定"AXB模式绑定接口"返回的绑定ID进行解绑 relationNum = '+861700001' #指定X号码(隐私号码)进行解绑 def buildAKSKHeader(appKey, appSecret): now = time.strftime('%Y-%m-%dT%H:%M:%SZ') #Created nonce = str(uuid.uuid4()).replace('-','') #Nonce digist = hmac.new(appSecret.encode(), (nonce + now).encode(), digestmod=sha256).digest() digestBase64 = base64.b64encode(digist).decode() #PasswordDigest return 'UsernameToken Username="{}",PasswordDigest="{}",Nonce="{}",Created="{}"'.format(appKey, digestBase64, nonce, now); def main(): # 请求URL参数 formData = urllib.parse.urlencode({ 'subscriptionId':subscriptionId, 'relationNum':relationNum }) #完整请求地址 fullUrl = realUrl + '?' + formData req = urllib.request.Request(url=fullUrl, method='DELETE') #请求方法为DELETE # 请求Headers参数 req.add_header('Authorization', 'AKSK realm="SDP",profile="UsernameToken",type="Appkey"') req.add_header('X-AKSK', buildAKSKHeader(APP_KEY, APP_SECRET)) req.add_header('Content-Type', 'application/json;charset=UTF-8') # 为防止因HTTPS证书认证失败造成API调用失败,需要先忽略证书信任问题 ssl._create_default_https_context = ssl._create_unverified_context try: print(formData) #打印请求数据 r = urllib.request.urlopen(req) #发送请求 print(r.read().decode('utf-8')) #打印响应结果 except urllib.error.HTTPError as e: print(e.code) print(e.read().decode('utf-8')) #打印错误信息 except urllib.error.URLError as e: print(e.reason) if name == 'main': main()

这段代码实现的是使用 Python 发送一个 DELETE 请求，访问华为云服务端提供的 AXB模式解绑接口，对指定的X号码进行解绑操作。主要包含以下步骤： 1. 构造请求地址，包括实际请求地址和请求参数。 2. 构造请求头，...

#!/usr/local/bin/python3 # -- coding: utf-8 -- # @Time : 2023/6/11 14:19 # @Author : 志在星空 # @File : jietu12.py # @Software: PyCharm import base64 import urllib import requests import json API_KEY = "jHxdMDf2bhEPxcG6zFoRUejo" SECRET_KEY = "QF5GO9eLQncG2Zr1MKrxLOWvKAkAtVfI" def main(): # 发送 OCR 请求并获取响应 url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=" + get_access_token() # payload = 'image=%2F9j%2F4AAQ' headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json' } image_path = "11.jpg" with open(image_path, "rb") as image_file: image_data = image_file.read() response = requests.post(url, headers=headers, data=image_data) # response = requests.request("POST", url, headers=headers, data=payload) # 解析响应并提取 words_result 中的所有 words 字段 result = json.loads(response.text) print(result) # words_list = [item['words'] for item in result['words_result']] # print(words_list) # # # 打印所有提取到的内容 # for words in words_list: # print(words) def get_file_content_as_base64(path, urlencoded=False): """ 获取文件base64编码 :param path: 文件路径 :param urlencoded: 是否对结果进行urlencoded :return: base64编码信息 """ with open(path, "rb") as f: content = base64.b64encode(f.read()).decode("utf8") if urlencoded: content = urllib.parse.quote_plus(content) return content def get_access_token(): """ 使用 AK，SK 生成鉴权签名（Access Token） :return: access_token，或是None(如果错误) """ url = "https://aip.baidubce.com/oauth/2.0/token" params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY} return str(requests.post(url, params=params).json().get("access_token")) if name == 'main': main()运行出现{'log_id': 1667825949995168966, 'error_msg': 'param image not exist', 'error_code': 216101}，请修改一下

根据你提供的代码和运行结果，可能是以下两个问题导致的： 1. 请求头中的 Content-Type 不正确，应该设置为 multipart/form-data，而不是 application/x-www-form-urlencoded。 2. 请求体中的数据格式不正确，应该...

#!/usr/local/bin/python3 # -- coding: utf-8 -- # @Time : 2023/6/11 14:19 # @Author : 志在星空 # @File : jietu12.py # @Software: PyCharm import base64 import urllib import requests import json API_KEY = "jHxdMDf2bhEPxcG6zFoRUejo" SECRET_KEY = "QF5GO9eLQncG2Zr1MKrxLOWvKAkAtVfI" def main(): # 发送 OCR 请求并获取响应 url = "https://aip.baidubce.com/rest/2.0/ocr/v1/general?access_token=" + get_access_token() # payload = 'image=%2F9j%2F4AAQ' headers = { 'Content-Type': 'application/x-www-form-urlencoded', 'Accept': 'application/json' } image_path = "11.jpg" with open(image_path, "rb") as image_file: image_data = image_file.read() response = requests.post(url, headers=headers, data=image_data) # response = requests.request("POST", url, headers=headers, data=payload) # 解析响应并提取 words_result 中的所有 words 字段 result = json.loads(response.text) print(result) # words_list = [item['words'] for item in result['words_result']] # print(words_list) # # # 打印所有提取到的内容 # for words in words_list: # print(words) def get_file_content_as_base64(path, urlencoded=False): """ 获取文件base64编码 :param path: 文件路径 :param urlencoded: 是否对结果进行urlencoded :return: base64编码信息 """ with open(path, "rb") as f: content = base64.b64encode(f.read()).decode("utf8") if urlencoded: content = urllib.parse.quote_plus(content) return content def get_access_token(): """ 使用 AK，SK 生成鉴权签名（Access Token） :return: access_token，或是None(如果错误) """ url = "https://aip.baidubce.com/oauth/2.0/token" params = {"grant_type": "client_credentials", "client_id": API_KEY, "client_secret": SECRET_KEY} return str(requests.post(url, params=params).json().get("access_token")) if name == 'main': main()运行出现{'log_id': 1667825949995168966, 'error_msg': 'param image not exist', 'error_code': 216101}，怎么办

请确保文件路径是正确的，并且文件名和扩展名的大小写与实际文件匹配。如果文件名或路径错误，将导致API无法找到该文件并出现上述错误。如果你确定文件路径和文件名正确无误，那么你可以检查一下API文档以确定API对...

python爬虫爬取贴吧图片

# coding:utf-8 import urllib import re def get_html(url): page = urllib.request.urlopen(url) html = page.read() return html def get_image(html_code): reg = r'src="(.+?\.jpg)" width' ...

Python中使用urllib2防止302跳转的代码例子

下面代码将可以做到避免302重定向到新的网页 #!/usr/bin/python # -*- coding: utf-8 -*- #encoding=utf-8 #Filename:states_code.py import urllib2 class RedirctHandler(urllib2.HTTPRedirectHandler): ...

Python从ZabbixAPI获取信息及实现Zabbix-API 监控的方法

import urllib.request, urllib.error, urllib.parse class ZabbixAPI: def __init__(self): self.__url = 'http://192.168.56.102/zabbix/api_jsonrpc.php' self.__user = 'admin' self.__password = 'zabbix' ...

Python爬虫 urllib2的使用方法详解

所谓网页抓取，就是把URL地址中指定的网络资源从网络流中读取出来，保存到本地。在Python中有很多库可以用来抓取网页，我们先学习urllib2。...#-*- coding:utf-8 -*- #01.urllib2_urlopen.py #导入urllib2库 import

python网页请求urllib2模块简单封装代码

/usr/bin/python#coding: utf-8import base64import urllibimport urllib2import time class SendRequest: ”’ This class use to set and request the http, and get the info of response. e.g. set ...

python使用urllib2提交http post请求的方法

#coding=utf-8 import urllib import urllib2 def post(url, data): req = urllib2.Request(url) data = urllib.urlencode(data) #enable cookie opener = urllib2.build_opener(urllib2.HTTPCookieProcessor())...

python3使用urllib示例取googletranslate(谷歌翻译)

/usr/bin/env python3# -*- coding: utf-8 -*-# File Name : gt1.py# Purpose :# Creation Date : 1390366260# Last Modified : Wed 22 Jan 2014 06:14:11 PM CST# Release By : Doom.zhou import urllib.request...

Python使用urllib模块对URL网址中的中文编码与解码实例详解

URL网址链接中的中文编码说明中文的gbk(GB2312)编码：一个汉字对应两组%xx，即%xx%xx 中文的UTF-8编码：一个汉字对应三组%xx，即%xx%xx%xx ...from urllib.request import quote, unquote # 编码 url1 =

相关推荐

爬取妹子壁纸.zip

详解Python3网络爬虫(二)：利用urllib.urlopen向有道翻译发送数据获得翻译结果

python-poster:数组发布支持的python海报

怎么使用utf-8编码

python爬虫爬取贴吧图片

Python中使用urllib2防止302跳转的代码例子

Python从ZabbixAPI获取信息及实现Zabbix-API 监控的方法

Python爬虫 urllib2的使用方法详解

python网页请求urllib2模块简单封装代码

python使用urllib2提交http post请求的方法

python3使用urllib示例取googletranslate(谷歌翻译)

Python使用urllib模块对URL网址中的中文编码与解码实例详解

最新推荐

IEEE Std 802.15.4z-2020 IEEE Standard（原版非图片）.pdf

图解系统-暗黑风格-小林coding-v1.0.pdf

Google已经推出了Google VR SDK，

zigbee-cluster-library-specification

管理建模和仿真的文件

实现实时数据湖架构：Kafka与Hive集成

list根据id查询pid 然后依次获取到所有的子节点数据

JSBSim Reference Manual

"互动学习：行动中的多样性与论文攻读经历"

实现实时监控告警系统：Kafka与Grafana整合