import requests from bs4 import BeautifulSoup import pandas as pd get_house_info (url) eaders = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'} response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') house_list = soup.find_all('li', class_='clear') house_info_list :list[dict[str,int]]= [] for i,house in enumerate(house_list,start=1): info = {} info['id'] = i info['title'] = house.find('div', class_='title').text.strip() info['positionInfo'] = house.find('div', class_='positionInfo').text.strip() info['address'] = house.find('div', class_='address').text.strip() info['followInfo'] = house.find('div', class_='followInfo').text.strip() info['flood'] = house.find('div', class_='flood').text.strip() info['priceInfo'] = house.find('div', class_='priceInfo').text.strip() house_info_list.append(info) return house_info_list save_to_excel(house_info_list, file_name) df = pd.DataFrame(house_info_list) df.to_excel(file_name, index=False) if __name__ == '__main__': url = 'https://nj.lianjia.com/ershoufang/' #修改链接为当地链接 house_info_list = [] for i in range(1, 21): # 收集20页的数据,大约400条 page_url = f'{url}pg{i}/' house_info_list.extend(get_house_info(page_url)) save_to_excel(house_info_list, 'house.xlsx') 怎么修改return_info_house的错误
时间: 2024-01-21 11:03:47 浏览: 142
在代码中,`get_house_info (url)`函数中的`return house_info_list`缩进不正确,应该与`for`循环处的缩进保持一致,即:
```
import requests
from bs4 import BeautifulSoup
import pandas as pd
def get_house_info(url):
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
house_list = soup.find_all('li', class_='clear')
house_info_list = []
for i, house in enumerate(house_list, start=1):
info = {}
info['id'] = i
info['title'] = house.find('div', class_='title').text.strip()
info['positionInfo'] = house.find('div', class_='positionInfo').text.strip()
info['address'] = house.find('div', class_='address').text.strip()
info['followInfo'] = house.find('div', class_='followInfo').text.strip()
info['flood'] = house.find('div', class_='flood').text.strip()
info['priceInfo'] = house.find('div', class_='priceInfo').text.strip()
house_info_list.append(info)
return house_info_list
def save_to_excel(data, file_name):
df = pd.DataFrame(data)
df.to_excel(file_name, index=False)
if __name__ == '__main__':
url = 'https://nj.lianjia.com/ershoufang/' #修改链接为当地链接
house_info_list = []
for i in range(1, 21): # 收集20页的数据,大约400条
page_url = f'{url}pg{i}/'
house_info_list.extend(get_house_info(page_url))
save_to_excel(house_info_list, 'house.xlsx')
```
注意缩进,并且需要定义`save_to_excel`函数。
阅读全文