import requests from bs4 import BeautifulSoup url = 'https://nantong.anjuke.com/sale/chongchuan/?from=HomePage_TopBar' # 获取网页内容 response = requests.get(url) html = response.content.decode('utf-8') # 解析网页内容 soup = BeautifulSoup(html, 'html.parser') house_list = soup.find_all('li', {'class': 'list-item'}) # 遍历房源信息 for house in house_list: # 房型 house_type = house.find('div', {'class': 'house-details'}).find_all('span')[0].text.strip() # 面积 area = house.find('div', {'class': 'house-details'}).find_all('span')[1].text.strip() # 小区 community = house.find('div', {'class': 'house-details'}).find_all('span')[2].text.strip() # 区域地点 location = house.find('div', {'class': 'house-details'}).find_all('span')[3].text.strip() # 总价 total_price = house.find('span', {'class': 'price-det'}).text.strip() # 每平方单价 unit_price = house.find('span', {'class': 'unit-price'}).text.strip() # 输出采集的数据 print('房型:', house_type) print('面积:', area) print('小区:', community) print('区域地点:', location) print('总价:', total_price) print('每平方单价:', unit_price) print('')为基础完成对数据进行清洗、处理:将面积、总价、每平方单价中的单位去除,并将数据转换为浮点型,将区域地点中的区和路名去除,仅保留中间的区域(例;取“崇川-北大街-幸福大道”中的“北大街”)。输出结果。
时间: 2023-09-19 18:07:19 浏览: 154
import sys import os import urllib from bs4 import BeautifulSoup
import requests
from bs4 import BeautifulSoup
url = 'https://nantong.anjuke.com/sale/chongchuan/?from=HomePage_TopBar'
response = requests.get(url)
html = response.content.decode('utf-8')
soup = BeautifulSoup(html, 'html.parser')
house_list = soup.find_all('li', {'class': 'list-item'})
for house in house_list:
house_type = house.find('div', {'class': 'house-details'}).find_all('span')[0].text.strip()
area = house.find('div', {'class': 'house-details'}).find_all('span')[1].text.strip()[:-2] # 去除单位
community = house.find('div', {'class': 'house-details'}).find_all('span')[2].text.strip()
location = house.find('div', {'class': 'house-details'}).find_all('span')[3].text.strip().split('-')[1] # 取区域
total_price = float(house.find('span', {'class': 'price-det'}).text.strip()[:-1]) # 去除单位并转换为浮点型
unit_price = float(house.find('span', {'class': 'unit-price'}).text.strip()[2:-4]) # 去除单位并转换为浮点型
print('房型:', house_type)
print('面积:', float(area))
print('小区:', community)
print('区域地点:', location)
print('总价:', total_price)
print('每平方单价:', unit_price)
print('')
阅读全文