帮我用python写一个爬取企查查公司名称和公司地址
时间: 2024-06-10 18:06:20 浏览: 201
基于Python的企查查爬虫,爬取完整的公司数据+源代码+文档说明
5星 · 资源好评率100%
好的,以下是Python代码:
```python
import requests
from bs4 import BeautifulSoup
url = 'https://www.qichacha.com/'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
# 搜索公司信息
def search(keyword):
search_url = url + 'search?key=' + keyword
r = requests.get(search_url, headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
result_table = soup.find_all('table', {'class': 'm_srchList'})[0]
result_list = result_table.find_all('tr')[1:]
results = []
for result in result_list:
name = result.find_all('td')[1].a.text.strip()
detail_url = result.find_all('td')[1].a['href']
address = result.find_all('td')[2].text.strip()
results.append({
'name': name,
'detail_url': detail_url,
'address': address
})
return results
# 获取公司详情页面信息
def get_detail(detail_url):
r = requests.get(detail_url, headers=headers)
soup = BeautifulSoup(r.text, 'html.parser')
return soup
# 解析公司详情页面信息,获取法人代表、注册资本等信息
def parse_detail(soup):
article = soup.find_all('div', {'class': 'content'})[3].get_text()
article = article.split('\n')
article = [line.strip() for line in article if line.strip() != '']
legal_representative = ''
registered_capital = ''
for i in range(len(article)):
if article[i] == '法定代表人':
legal_representative = article[i+1]
elif article[i] == '注册资本':
registered_capital = article[i+1]
return {
'legal_representative': legal_representative,
'registered_capital': registered_capital
}
# 测试代码
if __name__ == '__main__':
keyword = '百度'
results = search(keyword)
for result in results:
detail_url = url + result['detail_url']
soup = get_detail(detail_url)
detail_info = parse_detail(soup)
print('公司名称:', result['name'])
print('公司地址:', result['address'])
print('法定代表人:', detail_info['legal_representative'])
print('注册资本:', detail_info['registered_capital'])
print('\n')
```
将关键词改成你需要查询的公司名称,运行以上代码,即可获取该公司的名称、地址、法人代表、注册资本等信息。
阅读全文