删去下列代码中info1_list的html标签import requests from bs4 import BeautifulSoup import matplotlib.pyplot as plt import numpy as np headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41'} url='https://www.ibiquges.com/xiaoshuodaquan/' strhtml=requests.get(url,headers=headers) soup=BeautifulSoup(strhtml.text,'lxml') info_list = []#书籍目录 count_list = []#书籍数量 info1_list = [] for i in range(1, 14, 2): info = soup.select(f'#main > div:nth-child({i}) > ul') info1 = soup.select(f'#main > div:nth-child({i}) > h2') for item in info: #print(item.get_text()) count = len(info[0].find_all('li')) count_list.append(count) info_list.append(info) for item1 in info1: print(item1.get_text()) info1_list.append(info1) print(info1_list) print(count_list)
时间: 2024-04-04 12:30:03 浏览: 300
可以使用BeautifulSoup库中的get_text()方法来删除info1_list中的HTML标签,代码如下:
import requests from bs4 import BeautifulSoup import matplotlib.pyplot as plt import numpy as np headers={'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36 Edg/114.0.1823.41'} url='https://www.ibiquges.com/xiaoshuodaquan/' strhtml=requests.get(url,headers=headers) soup=BeautifulSoup(strhtml.text,'lxml') info_list = []#书籍目录 count_list = []#书籍数量 info1_list = [] for i in range(1, 14, 2): info = soup.select(f'#main > div:nth-child({i}) > ul') info1 = soup.select(f'#main > div:nth-child({i}) > h2') for item in info: #print(item.get_text()) count = len(info[0].find_all('li')) count_list.append(count) info_list.append(info) for item1 in info1: print(item1.get_text()) info1_list.append(item1.get_text()) print(info1_list) print(count_list)
阅读全文