帮我完成这四个步骤步骤1:从网络上获取网页内容getHTMLText() 步骤2:提取网页中的数据信息到合适的数据结构fillUnivList() 步骤3:将数据结构内容写入文件writeUlistfile() 步骤4:利用数据结构内容输出结果printUniverList()
时间: 2023-05-24 20:06:41 浏览: 89
步骤1:从网络上获取网页内容getHTMLText()
```python
import requests
def getHTMLText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
```
步骤2:提取网页中的数据信息到合适的数据结构fillUnivList()
```python
from bs4 import BeautifulSoup
def fillUnivList(html, ulist):
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr, bs4.element.Tag):
tds = tr('td')
ulist.append([tds[0].string, tds[1].string, tds[3].string])
```
步骤3:将数据结构内容写入文件writeUlistfile()
```python
def writeUlistfile(ulist):
with open('univlist.txt', 'w', encoding='utf-8') as f:
for i in range(len(ulist)):
f.write('{:<10}\t{:<10}\t{:<10}\n'.format(ulist[i][0], ulist[i][1], ulist[i][2]))
```
步骤4:利用数据结构内容输出结果printUniverList()
```python
def printUniverList(ulist, num):
print("{:^10}\t{:^6}\t{:^10}".format("排名", "学校名称", "总分"))
for i in range(num):
u = ulist[i]
print("{:^10}\t{:^6}\t{:^10}".format(u[0], u[1], u[2]))
```
完整代码:
```python
import requests
from bs4 import BeautifulSoup
def getHTMLText(url):
try:
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(html, ulist):
soup = BeautifulSoup(html, "html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr, bs4.element.Tag):
tds = tr('td')
ulist.append([tds[0].string, tds[1].string, tds[3].string])
def writeUlistfile(ulist):
with open('univlist.txt', 'w', encoding='utf-8') as f:
for i in range(len(ulist)):
f.write('{:<10}\t{:<10}\t{:<10}\n'.format(ulist[i][0], ulist[i][1], ulist[i][2]))
def printUniverList(ulist, num):
print("{:^10}\t{:^6}\t{:^10}".format("排名", "学校名称", "总分"))
for i in range(num):
u = ulist[i]
print("{:^10}\t{:^6}\t{:^10}".format(u[0], u[1], u[2]))
def main():
uinfo = []
url = 'http://www.zuihaodaxue.com/zuihaodaxuepaiming2017.html'
html = getHTMLText(url)
fillUnivList(html, uinfo)
writeUlistfile(uinfo)
printUniverList(uinfo, 20) # 前20所大学
main()
```
阅读全文