area = re.findall(r'\d+[.]?\d*', Soup.find('p', class_='content__list--item--des').text.strip())[0] pattern = re.findall(r'(\d+)室(\d+)厅(\d+)卫', Soup.find('p', class_='content__list--item--des').text.strip())转化为字符串类型

你的问题是将这段代码转化为字符串类型。可以使用 str() 函数将其转换为字符串类型，代码如下： ``` area_str = str(re.findall(r'\d+[.]?\d*', Soup.find('p', class_='content__list--item--des').text.strip())[0]) pattern_str = str(re.findall(r'(\d+)室(\d+)厅(\d+)卫', Soup.find('p', class_='content__list--item--des').text.strip())) ``` 这样就可以将 area 和 pattern 转化为字符串类型了。

import requests from bs4 import BeautifulSoup import pandas as pd import xlrd # 读取Excel文件中的小区名字 def read_excel(filename): data = pd.read_excel(filename) return data['小区名'].tolist() # 爬取二手房数据 def crawl_data(area): print(area) print('1') url = 'https://wx.ke.com/ershoufang/rs'+area # 无锡二手房页面 headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 Edge/16.16299' } params = { 'kw': area } response = requests.get(url, headers=headers, params=params) soup = BeautifulSoup(response.text, 'html.parser') # 解析页面数据 result = [] house_list = soup.find_all('div', class_='info clear') for house in house_list: title = house.find('div', class_='title').text.strip() address = house.find('div', class_='address').text.strip() house_Info = house.find('div', class_='houseInfo').text.strip() priceInfo = house.find('div', class_='priceInfo').text.strip() followInfo = house.find('div', class_='followInfo').text.strip() result.append({ 'title': title, 'address': address, 'house_info':house_Info, 'priceInfo':priceInfo, 'followInf':followInfo }) return result # 将数据保存到Excel文件中 def save_to_excel(data, filename): df = pd.DataFrame(data) df.to_excel(filename, index=False) # 主函数 if name == 'main': areas = read_excel('小区名.xlsx') for area in areas: print('正在爬取：', area) data = crawl_data(area) save_to_excel(data, area + '.xlsx') print('爬取完成！')

这段代码是一个简单的爬虫程序，用来爬取指定小区的二手房信息，并将数据保存到 Excel 文件中。程序首先通过读取 Excel 文件中的小区名字，依次对每个小区进行爬取。爬取时，程序将小区名字作为参数拼接到目标 URL 中，然后使用 requests 库发送 GET 请求获取 HTML 页面，再使用 BeautifulSoup 库解析 HTML 页面，提取出需要的信息，并将结果保存到一个列表中。最后，程序将列表中的数据保存到 Excel 文件中。

import requests from bs4 import BeautifulSoup url = 'https://nantong.anjuke.com/sale/chongchuan/?from=HomePage_TopBar' # 获取网页内容 response = requests.get(url) html = response.content.decode('utf-8') # 解析网页内容 soup = BeautifulSoup(html, 'html.parser') house_list = soup.find_all('li', {'class': 'list-item'}) # 遍历房源信息 for house in house_list: # 房型 house_type = house.find('div', {'class': 'house-details'}).find_all('span')[0].text.strip() # 面积 area = house.find('div', {'class': 'house-details'}).find_all('span')[1].text.strip() # 小区 community = house.find('div', {'class': 'house-details'}).find_all('span')[2].text.strip() # 区域地点 location = house.find('div', {'class': 'house-details'}).find_all('span')[3].text.strip() # 总价 total_price = house.find('span', {'class': 'price-det'}).text.strip() # 每平方单价 unit_price = house.find('span', {'class': 'unit-price'}).text.strip() # 输出采集的数据 print('房型：', house_type) print('面积：', area) print('小区：', community) print('区域地点：', location) print('总价：', total_price) print('每平方单价：', unit_price) print('')为基础完成对数据进行清洗、处理：将面积、总价、每平方单价中的单位去除，并将数据转换为浮点型，将区域地点中的区和路名去除，仅保留中间的区域（例；取“崇川-北大街-幸福大道”中的“北大街”）。输出结果。

import requests from bs4 import BeautifulSoup url = 'https://nantong.anjuke.com/sale/chongchuan/?from=HomePage_TopBar' response = requests.get(url) html = response.content.decode('utf-8') soup = BeautifulSoup(html, 'html.parser') house_list = soup.find_all('li', {'class': 'list-item'}) for house in house_list: house_type = house.find('div', {'class': 'house-details'}).find_all('span')[0].text.strip() area = house.find('div', {'class': 'house-details'}).find_all('span')[1].text.strip()[:-2] # 去除单位 community = house.find('div', {'class': 'house-details'}).find_all('span')[2].text.strip() location = house.find('div', {'class': 'house-details'}).find_all('span')[3].text.strip().split('-')[1] # 取区域 total_price = float(house.find('span', {'class': 'price-det'}).text.strip()[:-1]) # 去除单位并转换为浮点型 unit_price = float(house.find('span', {'class': 'unit-price'}).text.strip()[2:-4]) # 去除单位并转换为浮点型 print('房型：', house_type) print('面积：', float(area)) print('小区：', community) print('区域地点：', location) print('总价：', total_price) print('每平方单价：', unit_price) print('')

阅读全文

area = re.findall(r'\d+[.]?\d*', Soup.find('p', class_='content__list--item--des').text.strip())[0] pattern = re.findall(r'(\d+)室(\d+)厅(\d+)卫', Soup.find('p', class_='content__list--item--des').text.strip())转化为字符串类型

相关推荐

Python爬虫利器二之Beautiful Soup的用法.zip_python_爬虫_爬虫 python_爬虫 pyth

soupui.zip_soupui_分数阶梯度_多径

Network_Reptile.rar_爬虫_爬虫 评论_爬虫评论_简单爬虫_评论

怎么从 不限 d 大兴其它 m 马驹桥 t 通州其它 y 亦庄 亦庄开发区其它 这段代码中把href属性值全部提取出来

给这串代码加上省份数据city_data = hotel_data.groupby('city').agg({'hotel_name': 'count', 'room_count': 'sum'}).sort_values(by='hotel_name', ascending=False)

【Django GIS与REST API】：构建基于django.contrib.gis.gdal.field的REST API，一步到位

怎么从 不限 东城 这段代码中把href属性值提取出来

.编写程序，从“人才热线”网站爬取与“python”有关的招聘岗位列表信息(网页地址为:https://s.cjol.com/kw-python/? SearchT pe=3)。

用python编写爬取海南招标网站中标公告的代码，爬取页数为10，字段为链接和标题和发布时间和中标金额和全文，并写入excel或csv，网址为https://www.ccgp-hainan.gov.cn/cgw/cgw_list.jsp

http://cs.sofang.com/new/area使用bs4进行解析

如何从网络上抓取携程网关于湖北省黄冈市遗爱湖公园的具体用户评论，该页面链接为：https://you.ctrip.com/sight/huanggang859/127001.html?renderPlatform=

用Python从指定网站（https://travel.qunar.com/p-cs300195-hangzhou-meishi）中爬取需要的信息，包括页面的餐厅名称、星级、地址等信息，结果要求保存在与代码文件相同地址的目录下，文件名称为“qunar.csv”。

爬虫作业 爬http://www.nmc.cn/publish/agro/soil-moisture-monitoring-10cm.html中的数据以及图片

需求：爬取网址中的信息 https://www.58.com/ershoufang/

访问https://m.111.com.cn/yyw/activities/broadcast/#/home 用python获取以下数据内容：国内各地区疫情：新增、累计、治愈、死亡； 国外各国疫情：新增、累计、治愈、死亡

利用pathon爬取https://www.liepin.com/company-jobs/954482/的招聘信息的源代码

大家在看

dmx512无线舞台灯光系统

SIMATIC S71200和1500安全编程指南

INCA用的A2L文件生成脚本

计算机组成原理课程设计复杂模型机设计实现冒泡排序

CMOS反相器的掩膜版图-集成电路版图设计

最新推荐

探索zinoucha-master中的0101000101奥秘

【Qt与OpenGL集成】：提升框选功能图形性能，OpenGL的高效应用案例

ffmpeg 指定屏幕输出

个人网站技术深度解析：Haskell构建、黑暗主题、并行化等

Qt框选功能的国际化实践：支持多语言界面的核心技术解析

内网如何运行docker pull mysql:5.7

ImgToString开源工具：图像转字符串轻松实现

Qt框选功能安全性增强指南：防止恶意操作的有效策略

在ros平台中实现人脸识别

fildes前端开源库：对fs模块的创新实践

Network_Reptile.rar_爬虫_爬虫评论_爬虫评论_简单爬虫_评论

怎么从不限 d 大兴其它 m 马驹桥 t 通州其它 y 亦庄亦庄开发区其它这段代码中把href属性值全部提取出来

怎么从不限东城这段代码中把href属性值提取出来

爬虫作业爬http://www.nmc.cn/publish/agro/soil-moisture-monitoring-10cm.html中的数据以及图片

访问https://m.111.com.cn/yyw/activities/broadcast/#/home 用python获取以下数据内容：国内各地区疫情：新增、累计、治愈、死亡；国外各国疫情：新增、累计、治愈、死亡