删去下列代码中info1_list的html标签import requests from bs4 import BeautifulSoup import matplotlib.pyplot as plt import numpy as np headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41'} url='https://www.ibiquges.com/xiaoshuodaquan/' strhtml=requests.get(url,headers=headers) soup=BeautifulSoup(strhtml.text,'lxml') info_list = []#书籍目录 count_list = []#书籍数量 info1_list = [] for i in range(1, 14, 2): info = soup.select(f'#main > div:nth-child({i}) > ul') info1 = soup.select(f'#main > div:nth-child({i}) > h2') for item in info: #print(item.get_text()) count = len(info[0].find_all('li')) count_list.append(count) info_list.append(info) for item1 in info1: print(item1.get_text()) info1_list.append(info1) print(info1_list) print(count_list)
时间: 2024-04-03 20:36:25 浏览: 106
可以使用BeautifulSoup库中的get_text()方法来删除html标签。修改代码如下:
```python
import requests
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
import numpy as np
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41'}
url = 'https://www.ibiquges.com/xiaoshuodaquan/'
strhtml = requests.get(url, headers=headers)
soup = BeautifulSoup(strhtml.text, 'lxml')
info_list = [] # 书籍目录
count_list = [] # 书籍数量
info1_list = []
for i in range(1, 14, 2):
info = soup.select(f'#main > div:nth-child({i}) > ul')
info1 = soup.select(f'#main > div:nth-child({i}) > h2')
for item in info:
# print(item.get_text())
count = len(info[0].find_all('li'))
count_list.append(count)
info_list.append(item.get_text()) # 使用get_text()方法删除html标签
for item1 in info1:
print(item1.get_text())
info1_list.append(item1.get_text()) # 使用get_text()方法删除html标签
print(info1_list)
print(count_list)
```
这样就可以将info1_list中的html标签删除了。
阅读全文