File "F:\pyproject\demo2.py", line 25, in get_info phone = soup.find('span',class_='index_detail-tel__fgpsE').text ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'text'的问题

这个错误通常表示在使用 .text 属性时，对象为 NoneType，也就是在 BeautifulSoup 中没有找到对应的标签或属性。你需要检查一下代码中的 soup.find() 方法是否正确地定位到了你需要的标签和属性，如果没有找到，可以尝试使用其他方法或检查一下网页结构是否有变化。另外，也可以在使用 .text 属性前，先通过 if 判断一下对象是否存在，避免抛出 AttributeError 异常。

import requests from bs4 import BeautifulSoup import openpyxl from time import sleep # 设置请求头，模拟浏览器访问 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'} # 从天眼查获取公司邮箱和电话 def get_info(company): email = '' phone = '' url = 'https://www.tianyancha.com/search?key=' + company r = requests.get(url, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') # try: # 获取公司详情页链接 company_url = soup.find_all('a', class_='index_alink__zcia5 link-click')[0].get('href') r = requests.get(company_url, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') # 获取公司邮箱和电话 email = soup.find_all('span', class_='index_detail-email__B_1Tq')[0].text sleep(0.5) phone = soup.find('span',class_='index_detail-tel__fgpsE').text # except: # pass # return email,phone # 从Excel文件中读取公司名称 def read_from_excel(file_path): wb = openpyxl.load_workbook(file_path) ws = wb.active company_list = [] for row in ws.iter_rows(min_row=2, values_only=True): company_list.append(row[0]) return company_list # 将公司邮箱和电话写入Excel文件 def write_to_excel(company_list): wb = openpyxl.Workbook() ws = wb.active ws.title = 'Company Info' # 写入表头 ws.cell(row=1, column=1, value='Company Name') ws.cell(row=1, column=2, value='Email') ws.cell(row=1, column=3, value='Phone') # 写入数据 for i, company in enumerate(company_list): email,phone = get_info(company) ws.cell(row=i+2, column=1, value=company) ws.cell(row=i+2, column=2, value=email) ws.cell(row=i+2, column=3, value=phone) # 保存Excel文件 wb.save('company_info.xlsx') if name == 'main': file_path = 'company_names.xlsx' company_list = read_from_excel(file_path) write_to_excel(company_list)优化这段代码

这段代码可以进行以下优化： 1. 在函数 get_info 中，使用 try...except 捕获异常会使代码难以维护。可以使用 if...else 判断 soup.find_all() 和 soup.find() 是否返回了有效结果，如果无效则将 email 和 phone 赋值为空字符串。 2. 在函数 write_to_excel 中，每次调用 get_info() 都会发起一个 http 请求，这会导致程序运行速度变慢。可以先将所有公司的详情页链接获取到，再一次性请求，减少 http 请求的次数。下面是优化后的代码： ``` import requests from bs4 import BeautifulSoup import openpyxl from time import sleep # 设置请求头，模拟浏览器访问 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36' } # 从天眼查获取公司邮箱和电话 def get_info(company, company_urls): email = '' phone = '' url = 'https://www.tianyancha.com/search?key=' + company r = requests.get(url, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') # 获取公司详情页链接 company_url = soup.find_all('a', class_='index_alink__zcia5 link-click') if company_url: company_urls.append(company_url[0].get('href')) # 获取公司邮箱和电话 if company_urls: r = requests.get(company_urls[-1], headers=headers) soup = BeautifulSoup(r.text, 'html.parser') email = soup.find('span', class_='index_detail-email__B_1Tq') phone = soup.find('span', class_='index_detail-tel__fgpsE') if email: email = email.text if phone: phone = phone.text sleep(0.5) return email, phone # 从Excel文件中读取公司名称 def read_from_excel(file_path): wb = openpyxl.load_workbook(file_path) ws = wb.active company_list = [] for row in ws.iter_rows(min_row=2, values_only=True): company_list.append(row[0]) return company_list # 将公司邮箱和电话写入Excel文件 def write_to_excel(company_list): company_urls = [] for company in company_list: email, phone = get_info(company, company_urls) company_urls.pop() row_num = len(company_urls) + 2 ws.cell(row=row_num, column=1, value=company) ws.cell(row=row_num, column=2, value=email) ws.cell(row=row_num, column=3, value=phone) # 保存Excel文件 wb.save('company_info.xlsx') if __name__ == '__main__': file_path = 'company_names.xlsx' company_list = read_from_excel(file_path) wb = openpyxl.Workbook() ws = wb.active ws.title = 'Company Info' # 写入表头 ws.cell(row=1, column=1, value='Company Name') ws.cell(row=1, column=2, value='Email') ws.cell(row=1, column=3, value='Phone') write_to_excel(company_list) ```

import os import csv from bs4 import BeautifulSoup # 设置文件夹路径 folder_path = 'C:/Users/test/Desktop/DIDItest' # 遍历文件夹中的所有文件和子文件夹 for root, dirs, files in os.walk(folder_path): for file_name in files: if file_name.endswith('.html'): file_path = os.path.join(root, file_name) # 打开HTML文件并读取源代码 with open(file_path, 'r', encoding='utf-8') as file: html_content = file.read() # 在这里可以对源代码进行进一步的操作和提取 soup = BeautifulSoup(html_content, 'html.parser') messages = soup.find_all('p') for message in messages: talk_id_tag = message.find_previous('a') if talk_id_tag: talk_id = talk_id_tag.text.strip()[1:] time = message.contents[0].strip().split(',')[0][1:] send_phone = message.find('span', class_='hint-success').text.strip() receive_phone = message.find_all('span', class_='hint-success')[1].text.strip() content_tag = message.find('a') if content_tag: content = content_tag['href'] content_type = '音频' else: content = message.text.strip() content_type = '文本' print("talkid:", talk_id) print("时间:", time) print("发送号码:", send_phone) print("接收号码:", receive_phone) print("发送内容:", content) print()

这段代码有一个缩进问题，最后的`print`语句没有在正确的位置，应该在`for`循环内，以便在每次迭代时打印出提取的信息。请将最后的`print`语句缩进到正确的位置。以下是修改后的代码： ```python import os import csv from bs4 import BeautifulSoup # 设置文件夹路径 folder_path = 'C:/Users/test/Desktop/DIDItest' # 遍历文件夹中的所有文件和子文件夹 for root, dirs, files in os.walk(folder_path): for file_name in files: if file_name.endswith('.html'): file_path = os.path.join(root, file_name) # 打开HTML文件并读取源代码 with open(file_path, 'r', encoding='utf-8') as file: html_content = file.read() # 在这里可以对源代码进行进一步的操作和提取 soup = BeautifulSoup(html_content, 'html.parser') messages = soup.find_all('p') for message in messages: talk_id_tag = message.find_previous('a') if talk_id_tag: talk_id = talk_id_tag.text.strip()[1:] time = message.contents[0].strip().split(',')[0][1:] send_phone = message.find('span', class_='hint-success').text.strip() receive_phone = message.find_all('span', class_='hint-success')[1].text.strip() content_tag = message.find('a') if content_tag: content = content_tag['href'] content_type = '音频' else: content = message.text.strip() content_type = '文本' print("talkid:", talk_id) print("时间:", time) print("发送号码:", send_phone) print("接收号码:", receive_phone) print("发送内容:", content) print() ``` 请注意，这段代码还没有将提取到的信息写入CSV文件，你可以根据需要进一步完善代码来实现写入CSV的功能。

阅读全文

File "F:\pyproject\demo2.py", line 25, in get_info phone = soup.find('span',class_='index_detail-tel__fgpsE').text ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ AttributeError: 'NoneType' object has no attribute 'text'的问题

相关推荐

ISTS工具中的Special-Soup与shell.py用法解析

Python网络爬虫实战：快速抓取subito.it数据

Python开发的战舰游戏：Battleship.py的优化之旅

Traceback (most recent call last): File "D:\Notepad++\b.py", line 57, in <module> playlists = get_playlists(soup) File "D:\Notepad++\b.py", line 25, in get_playlists playlist['create_time'] = li.find('span', {'class': 's-fc3'}).text AttributeError: 'NoneType' object has no attribute 'text'

Traceback (most recent call last): File "D:\Notepad++\b.py", line 66, in <module> playlists = get_playlists(soup) File "D:\Notepad++\b.py", line 39, in get_playlists playlist['song_count'] = li.find('span', {'class': 's-fc3'}).text AttributeError: 'NoneType' object has no attribute 'text'

运行显示下面内容Traceback (most recent call last): File "C:/Users/w/Desktop/1.py", line 7, in <module> content = soup.find('div', class_='lemma-summary').get_text().strip() AttributeError: 'NoneType' object has no attribute 'get_text'

Traceback (most recent call last): File "D:\python项目\main.py", line 10, in <module> win_data = soup.find("div", class_="win_data").text AttributeError: 'NoneType' object has no attribute 'text'

Traceback (most recent call last): File "D:\python项目\main.py", line 9, in <module> home_team = soup.find('div', (span.text)) NameError: name 'span' is not defined

Traceback (most recent call last): File "F:\pythonProject\DaoQushuju.py", line 28, in <module> area = house.find('span', class_='content__list--item--des').text.strip() AttributeError: 'NoneType' object has no attribute 'text'解析

Habbo登录验证工具：checker.py使用方法介绍

全面覆盖：Python 2.x版本插件包指南

大家在看

PAMA机床操作手册_中英文对照

基于Informix+External+Table实现数据快速加载

dosbox:适用于Android的DosBox Turbo FreeBox

PCIE2.0总线规范，用于PCIE开发参考.zip

多邻国语言学习 v5.13.4 for Android 英语、日语、韩语、德语…等30余种语言学习应用 .rar

最新推荐

基于CNN-GRU-Attention混合神经网络的负荷预测方法 附Python代码.rar

,,Induction-Motor-VF-Control：基于MATLAB Simulink的利用V F控制的感应电机调速仿真模型 仿真条件：MATLAB Simulink R2015b ,核心关键

Windows下操作Linux图形界面的VNC工具

【SketchUp Ruby API：从入门到精通】

VMware虚拟机打开虚拟网络编辑器出现由于找不到vnetlib.dll,无法继续执行代码。重新安装程序可能会解决问题

基于Preact的高性能PWA实现定期天气信息更新

从停机到上线，EMC VNX5100控制器SP更换的实战演练

ubuntu labelme中文版安装

全新免费HTML5商业网站模板发布

EMC VNX5100控制器SP更换全流程指南：新手到高手的必备技能

基于CNN-GRU-Attention混合神经网络的负荷预测方法附Python代码.rar

,,Induction-Motor-VF-Control：基于MATLAB Simulink的利用V F控制的感应电机调速仿真模型仿真条件：MATLAB Simulink R2015b ,核心关键