python爬取链家租房数据
时间: 2024-02-17 11:58:03 浏览: 148
本项目利用Python的scrapy框架爬取链家网的上海市租房信息
以下是使用Python爬取链家租房数据的示例代码:
```python
import requests
from bs4 import BeautifulSoup
# 设置请求头,模拟浏览器访问
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
# 设置要爬取的城市和页码范围
city = 'bj' # 北京
start_page = 1
end_page = 5
# 循环遍历每一页的房源列表页
for page in range(start_page, end_page + 1):
url = f'https://{city}.lianjia.com/zufang/pg{page}/#contentList'
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
# 解析房源信息
house_list = soup.find_all('div', class_='content__list--item--main')
for house in house_list:
district = house.find('a', class_='content__list--item--des').text.strip()
street = house.find('a', class_='content__list--item--des').find_next('a').text.strip()
community = house.find('a', class_='content__list--item--des').find_next('a').find_next('a').text.strip()
floor = house.find('span', class_='content__list--item--des').text.strip()
lift = house.find('span', class_='content__list--item--des').find_next('span').text.strip()
area = house.find('span', class_='content__list--item--des').find_next('span').find_next('span').text.strip()
toward = house.find('span', class_='content__list--item--des').find_next('span').find_next('span').find_next('span').text.strip()
model = house.find('span', class_='content__list--item--des').find_next('span').find_next('span').find_next('span').find_next('span').text.strip()
rent = house.find('span', class_='content__list--item-price').text.strip()
# 打印房源信息
print(f"城区名:{district}")
print(f"街道名:{street}")
print(f"小区名:{community}")
print(f"楼层信息:{floor}")
print(f"有无电梯:{lift}")
print(f"面积:{area}")
print(f"房屋朝向:{toward}")
print(f"户型:{model}")
print(f"租金:{rent}")
print("")
```
阅读全文