import requests from bs4 import BeautifulSoup import os # 设置需要爬取的页面数量 total_pages = 250 # 遍历爬取页面 for i in range(1, total_pages + 1): url = f"https://www.bernama.com/en/general/news.php?page={i}" res = requests.get(url) soup = BeautifulSoup(res.text, "html.parser") # 获取需要写入的文本内容 text = soup.find("div",{"class": "col-12 col-sm-12 col-md-12 col-lg-8"}).text # 保存到txt文件 filename = f"{i}.txt" with open(filename, "w", encoding="utf-8") as f: f.write(text) print(f"Page {i} saved successfully.") print("All pages saved successfully.")该代码出现错误,'NoneType' object has no attribute 'text',请修改并保证能够爬取数据和运行
时间: 2024-03-11 18:44:21 浏览: 111
import reimport requestsfrom bs4 import BeautifulSoupimport t
出现 `'NoneType' object has no attribute 'text'` 错误,是因为在 `soup.find()` 没有找到对应的元素,返回的是 `None`,而 `None` 类型没有 `text` 属性,无法使用 `.text`。
为了避免出现这种错误,您可以在使用 `.find()` 前先判断元素是否存在,如果不存在则跳过当前页面的爬取。以下是修改后的代码:
```python
import requests
from bs4 import BeautifulSoup
import os
# 设置需要爬取的页面数量
total_pages = 250
# 遍历爬取页面
for i in range(1, total_pages + 1):
url = f"https://www.bernama.com/en/general/news.php?page={i}"
res = requests.get(url)
soup = BeautifulSoup(res.text, "html.parser")
# 获取需要写入的文本内容
div = soup.find("div", {"class": "col-12 col-sm-12 col-md-12 col-lg-8"})
if not div:
print(f"Page {i} has no data, skipped.")
continue
text = div.text
# 保存到txt文件
filename = f"{i}.txt"
with open(filename, "w", encoding="utf-8") as f:
f.write(text)
print(f"Page {i} saved successfully.")
print("All pages saved successfully.")
```
这段代码先判断 `div` 是否存在,如果不存在则跳过当前页面的爬取,并输出提示信息。如果找到了对应的元素,则继续执行后续的保存文本操作。
阅读全文