import urllib3 import pandas as pd import os def download_content(url): http = urllib3.PoolManager() response = http.request("GET", url) response_data = response.data html_content = response_data.decode() return html_content def save_excel(): html_content = download_content("http://fx.cmbchina.com/Hq/") cmb_table_list = pd.read_html(html_content) cmb_table_list[1].to_excel("Bit&Yanan.xlsx") def main(): save_excel() if __name__ == '__main__': main() os.startfile("Bit&Yanan.xlsx") import pandas as pd import matplotlib.pyplot as plt import numpy as np plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号 rapx = (114.4936096 - 112.6832583)/50 rapy = (23.87839806 - 22.49308313)/50 minx = 112.6832583+rapx*2 maxx = 114.4936096+rapx*2 miny = 22.49308313 maxy = 23.87839806 data = pd.read_excel("Bit&Yanan.xlsx") print(data.head()),继续完成用
时间: 2023-06-26 22:08:56 浏览: 160
pandas和matplotlib库分析和展示数据的任务。
首先,我们可以查看一下读入数据后的前几行,使用`.head()`函数,例如`print(data.head())`。
接下来,你可以使用pandas库进行数据的筛选和处理,例如选取特定列、按照某列进行排序、按照某列筛选数据等等。具体使用方法可以参考pandas的官方文档或者各种在线教程。
最后,你可以使用matplotlib库绘制各种图表,例如折线图、散点图、柱状图等等,来展示数据的规律和趋势。同样的,matplotlib库的使用方法可以参考官方文档或者各种在线教程。需要注意的是,matplotlib库的绘图需要一定的数学基础和美学素养,因此建议在实际操作中结合各种案例和实践经验,不断提高自己的绘图技能。
相关问题
帮我找一下这段代码的错误,并改正import urllib.parse import urllib.request def create_request(page): base_url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&' data = { 'start':(page - 1) * 20, 'limit':20 } data = urllib.parse.urlencode(data) url = base_url + data # print(url) headers = { # 'User - Agent': 'Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 108.0.5359.95Safari / 537.36' 'User - Agent': 'Mozilla / 5.0(Windows NT 10.0;WOW64) AppleWebKit / 537.36(KHTML, likeGecko) Chrome / 108.0.5359.95Safari / 537.36' } request = urllib.request.Request(url=url,headers=headers) return request def get_content(request): response = urllib.request.urlopen(request) content = response.read().decode('utf-8') return content def down_load(page,content): with open ('douban_' + str(page) + '.json','w',encoding='utf-8') as fp: fp.write(content) if __name__ == '__main__': start_page = int (input('请输入开始的页码')) end_page = int (input('请输入结束的页码')) for page in range(start_page,end_page+1): # 每一页都有请求对象的定制 request = create_request(page) content = get_content(request) down_load(page,content)
该代码存在两个错误:
1. headers中的键名有空格,需要去掉空格。
2. down_load函数名错误,应为download。
改正后的代码如下:
```python
import urllib.parse
import urllib.request
def create_request(page):
base_url = 'https://movie.douban.com/j/chart/top_list?type=5&interval_id=100%3A90&action=&'
data = {
'start': (page - 1) * 20,
'limit': 20
}
data = urllib.parse.urlencode(data)
url = base_url + data
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.95 Safari/537.36'
}
request = urllib.request.Request(url=url, headers=headers)
return request
def get_content(request):
response = urllib.request.urlopen(request)
content = response.read().decode('utf-8')
return content
def download(page, content):
with open('douban_' + str(page) + '.json', 'w', encoding='utf-8') as fp:
fp.write(content)
if __name__ == '__main__':
start_page = int(input('请输入开始的页码'))
end_page = int(input('请输入结束的页码'))
for page in range(start_page, end_page + 1):
request = create_request(page)
content = get_content(request)
download(page, content)
```
import urllib.request as urllib2from bs4 import BeautifulSoup as bsfrom urllib.error import URLError, HTTPError, ContentTooShortErrora = []def download(url): print("download..."+url) try: html = urllib2.urlopen(url) except(URLError, HTTPError, ContentTooShortError) as e: print("download error:"+e.reason) html = None return htmldef parser_html(response): soup = bs(response, 'html.parser') return soup# 打印信息def out_information(soup): # 记录爬取了多少数据 indexSpan = 1 # 打印出title标签的内容 print(soup.title.string) # 将a标签的信息存储在a列表里 for link in soup.find_all('a'): a.append(str(indexSpan) + link.text) indexSpan += 1 # 打印出a列表的内容 print(a)if __name__ == "__main__": # 需要访问的url url = "https://www.baidu.com/" # 下载url response = download(url) # 解析网页 soup = parser_html(response) # 打印内容 out_information(soup)
这段代码尝试从指定的URL下载页面,然后使用BeautifulSoup来解析页面内容,最后打印出页面的title标签和所有链接(a标签)的文本内容。
不过,这段代码有几个问题:
1. `urllib2` 库的导入语句应该写成 `import urllib.request as urllib2`,否则会报错。
2. 在 `except` 语句中,`e.reason`应该改为`str(e)`或者`e.reason`改为`e.reason.strerror`,否则也会报错。
3. 在 `out_information` 函数中,变量 `a` 没有被初始化,需要在函数前面加上 `a = []` 进行初始化。
下面是修改后的代码:
```
import urllib.request as urllib2
from bs4 import BeautifulSoup as bs
from urllib.error import URLError, HTTPError, ContentTooShortError
a = []
def download(url):
print("download..."+url)
try:
html = urllib2.urlopen(url)
except (URLError, HTTPError, ContentTooShortError) as e:
print("download error:" + str(e))
html = None
return html
def parser_html(response):
soup = bs(response, 'html.parser')
return soup
# 打印信息
def out_information(soup):
# 记录爬取了多少数据
indexSpan = 1
# 打印出title标签的内容
print(soup.title.string)
# 将a标签的信息存储在a列表里
for link in soup.find_all('a'):
a.append(str(indexSpan) + link.text)
indexSpan += 1
# 打印出a列表的内容
print(a)
if __name__ == "__main__":
# 需要访问的url
url = "https://www.baidu.com/"
# 下载url
response = download(url)
# 解析网页
soup = parser_html(response)
# 打印内容
out_information(soup)
```
阅读全文