import requests from bs4 import BeautifulSoup import openpyxl from time import sleep # 设置请求头,模拟浏览器访问 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'} # 从天眼查获取公司邮箱和电话 def get_info(company): email = '' phone = '' url = 'https://www.tianyancha.com/search?key=' + company r = requests.get(url, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') # try: # 获取公司详情页链接 company_url = soup.find_all('a', class_='index_alink__zcia5 link-click')[0].get('href') r = requests.get(company_url, headers=headers) soup = BeautifulSoup(r.text, 'html.parser') # 获取公司邮箱和电话 email = soup.find_all('span', class_='index_detail-email__B_1Tq')[0].text sleep(0.5) phone = soup.find('span',class_='index_detail-tel__fgpsE').text # except: # pass # return email,phone # 从Excel文件中读取公司名称 def read_from_excel(file_path): wb = openpyxl.load_workbook(file_path) ws = wb.active company_list = [] for row in ws.iter_rows(min_row=2, values_only=True): company_list.append(row[0]) return company_list # 将公司邮箱和电话写入Excel文件 def write_to_excel(company_list): wb = openpyxl.Workbook() ws = wb.active ws.title = 'Company Info' # 写入表头 ws.cell(row=1, column=1, value='Company Name') ws.cell(row=1, column=2, value='Email') ws.cell(row=1, column=3, value='Phone') # 写入数据 for i, company in enumerate(company_list): email,phone = get_info(company) ws.cell(row=i+2, column=1, value=company) ws.cell(row=i+2, column=2, value=email) ws.cell(row=i+2, column=3, value=phone) # 保存Excel文件 wb.save('company_info.xlsx') if name == 'main': file_path = 'company_names.xlsx' company_list = read_from_excel(file_path) write_to_excel(company_list) 此段代码定位天眼查的公司电话失败,帮助我重新写一遍代码,能够正确定位天眼查公司电话邮箱
时间: 2024-04-21 22:27:43 浏览: 188
好的,以下是重新写的代码,其中对定位电话号码的部分进行了修改和完善,同时也加入了异常处理:
```
import requests
from bs4 import BeautifulSoup
import openpyxl
from time import sleep
# 设置请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36'
}
# 从天眼查获取公司邮箱和电话
def get_info(company):
email = ''
phone = ''
url = 'https://www.tianyancha.com/search?key=' + company
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
try:
# 获取公司详情页链接
company_url = soup.find_all('a', class_='name')[0].get('href')
r = requests.get(company_url, headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
# 获取公司邮箱
email_soup = soup.find('span', class_='email')
if email_soup:
email = email_soup.get('data-email')
# 获取公司电话
phone_soup = soup.select_one('.detail.web-show span[ng-if="baseInfoModel.phonenumber"]')
if phone_soup:
phone = phone_soup.text.strip()
except:
pass
return email, phone
# 从Excel文件中读取公司名称
def read_from_excel(file_path):
wb = openpyxl.load_workbook(file_path)
ws = wb.active
company_list = []
for row in ws.iter_rows(min_row=2, values_only=True):
company_list.append(row[0])
return company_list
# 将公司邮箱和电话写入Excel文件
def write_to_excel(company_list):
wb = openpyxl.Workbook()
ws = wb.active
ws.title = 'Company Info'
# 写入表头
ws.cell(row=1, column=1, value='Company Name')
ws.cell(row=1, column=2, value='Email')
ws.cell(row=1, column=3, value='Phone')
# 写入数据
for i, company in enumerate(company_list):
email, phone = get_info(company)
ws.cell(row=i+2, column=1, value=company)
ws.cell(row=i+2, column=2, value=email)
ws.cell(row=i+2, column=3, value=phone)
# 等待0.5秒,避免请求过于频繁
sleep(0.5)
# 保存Excel文件
wb.save('company_info.xlsx')
if __name__ == '__main__':
file_path = 'company_names.xlsx'
company_list = read_from_excel(file_path)
write_to_excel(company_list)
```
希望对你有帮助!
阅读全文