代码运行报错,请帮我分析一下原因 import urllib.request import re import pandas as pd import time def getdata(url): req = urllib.request.Request(url) req.add_header('User-Agent', ' Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36') data = urllib.request.urlopen(req).read().decode('utf-8') str1 = str(data) pat = '''<tr> <td>(.?)</td> <td>(.?)</td> <td>(.?)</td> <td>(.?)(.?</td> <td>.?</td> </tr>''' result = re.compile(pat).findall(str1) return result if name == 'main': for i in range(1968, 2020): print('正在收集第%d年数据' % i) rank = [] country = [] zhou = [] total = [] url = 'https://www.kuaiyilicai.com/stats/global/yearly/g_gdp/' + str(i) + '.html' data = getdata(url) for j in range(0, len(data)): rank.append(data[j][0]) country.append(data[j][1]) zhou.append(data[j][2]) total.append(data[j][3]) dataframe = pd.DataFrame({'排名': rank, '国家/地区': country, '所在洲': zhou, 'GDP(美元计)': total}) dataframe.to_csv(str(i) + "年世界gdp排名.csv", index=False, sep=',', encoding="utf_8_sig", mode="a+") print(i, '年数据收集完成') time.sleep(2)
时间: 2024-04-28 22:23:46 浏览: 10
这段代码中有一些错误。第一,缺少 `if __name__ == '__main__':` 的语句,导致 `name` 变量未定义。第二,正则表达式中的括号不匹配,需要将第四个括号和第五个括号合并。第三,使用了 `mode="a+"` 参数将文件追加写入,但是每次循环都会打开文件,应该在循环前打开文件,循环结束后关闭文件。以下是修改后的代码:
```
import urllib.request
import re
import pandas as pd
import time
def getdata(url):
req = urllib.request.Request(url)
req.add_header('User-Agent', ' Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/74.0.3729.169 Safari/537.36')
data = urllib.request.urlopen(req).read().decode('utf-8')
str1 = str(data)
pat = '''<tr> <td>(.*?)</td> <td>(.*?)</td> <td>(.*?)</td> <td>(.*?)</td> <td>.?</td> </tr>'''
result = re.compile(pat).findall(str1)
return result
if __name__ == '__main__':
for i in range(1968, 2020):
print('正在收集第%d年数据' % i)
rank = []
country = []
zhou = []
total = []
url = 'https://www.kuaiyilicai.com/stats/global/yearly/g_gdp/' + str(i) + '.html'
data = getdata(url)
for j in range(0, len(data)):
rank.append(data[j][0])
country.append(data[j][1])
zhou.append(data[j][2])
total.append(data[j][3])
dataframe = pd.DataFrame({'排名': rank, '国家/地区': country, '所在洲': zhou, 'GDP(美元计)': total})
with open(str(i) + "年世界gdp排名.csv", mode="w", encoding="utf_8_sig") as f:
dataframe.to_csv(f, index=False, sep=',')
print(i, '年数据收集完成')
time.sleep(2)
```
请注意,此代码仅供参考,具体实现方式需要根据实际情况进行调整。