import numpy as np import pandas as pd import matplotlib.pyplot as plt plt.rcParams['font.sans-serif'] = ["SimHei"] # 单使用会使负号显示错误 plt.rcParams['axes.unicode_minus'] = False # 把负号正常显示 # 读取北京房价数据 path = 'data.txt' data = pd.read_csv(path, header=None, names=['房子面积', '房子价格']) print(data.head(10)) print(data.describe()) # 绘制散点图 data.plot(kind='scatter', x='房子面积', y='房子价格') plt.show() def computeCost(X, y, theta): inner = np.power(((X * theta.T) - y), 2) return np.sum(inner) / (2 * len(X)) data.insert(0, 'Ones', 1) cols = data.shape[1] X = data.iloc[:,0:cols-1]#X是所有行,去掉最后一列 y = data.iloc[:,cols-1:cols]#X是所有行,最后一列 print(X.head()) print(y.head()) X = np.matrix(X.values) y = np.matrix(y.values) theta = np.matrix(np.array([0,0])) print(theta) print(X.shape, theta.shape, y.shape) def gradientDescent(X, y, theta, alpha, iters): temp = np.matrix(np.zeros(theta.shape)) parameters = int(theta.ravel().shape[1]) cost = np.zeros(iters) for i in range(iters): error = (X * theta.T) - y for j in range(parameters): term = np.multiply(error, X[:, j]) temp[0, j] = theta[0, j] - ((alpha / len(X)) * np.sum(term)) theta = temp cost[i] = computeCost(X, y, theta) return theta, cost alpha = 0.01 iters = 1000 g, cost = gradientDescent(X, y, theta, alpha, iters) print(g) print(computeCost(X, y, g)) x = np.linspace(data.Population.min(), data.Population.max(), 100) f = g[0, 0] + (g[0, 1] * x) fig, ax = plt.subplots(figsize=(12,8)) ax.plot(x, f, 'r', label='Prediction') ax.scatter(data.Population, data.Profit, label='Traning Data') ax.legend(loc=2) ax.set_xlabel('房子面积') ax.set_ylabel('房子价格') ax.set_title('北京房价拟合曲线图') plt.show()
时间: 2024-03-03 16:53:41 浏览: 127
import numpy as np import matplotlib.pyplot as plt import math
这段代码中有几个错误需要修正:
1. 代码中导入库的语句需要加上换行符`\`,否则会提示语法错误:
```
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
plt.rcParams['font.sans-serif'] = ["SimHei"] # 单使用会使负号显示错误
plt.rcParams['axes.unicode_minus'] = False # 把负号正常显示
# 读取北京房价数据
path = 'data.txt'
data = pd.read_csv(path, header=None, names=['房子面积', '房子价格'])
print(data.head(10))
print(data.describe())
```
2. `X` 和 `y` 赋值时,需要把 `X` 和 `y` 同时修改,否则会导致后面的代码出错:
```
data.insert(0, 'Ones', 1)
cols = data.shape[1]
X = data.iloc[:,0:cols-1] # X是所有行,去掉最后一列
y = data.iloc[:,cols-1:cols] # y是所有行,最后一列
print(X.head())
print(y.head())
X = np.matrix(X.values)
y = np.matrix(y.values)
theta = np.matrix(np.array([0,0]))
print(theta)
print(X.shape, theta.shape, y.shape)
```
3. `gradientDescent` 函数中的代码需要修改,需要将 `temp` 数组的大小调整为 `(1, parameters)`,否则会导致后面的代码出错:
```
def gradientDescent(X, y, theta, alpha, iters):
temp = np.matrix(np.zeros((1, parameters))) # 修改这行代码
parameters = int(theta.ravel().shape[1])
cost = np.zeros(iters)
for i in range(iters):
error = (X * theta.T) - y
for j in range(parameters):
term = np.multiply(error, X[:, j])
temp[0, j] = theta[0, j] - ((alpha / len(X)) * np.sum(term))
theta = temp
cost[i] = computeCost(X, y, theta)
return theta, cost
```
4. 绘制拟合曲线的代码中使用了 `data.Population` 和 `data.Profit`,而这个数据集中没有这些列名,应该使用正确的列名 `房子面积` 和 `房子价格`:
```
x = np.linspace(data['房子面积'].min(), data['房子面积'].max(), 100)
f = g[0, 0] + (g[0, 1] * x)
fig, ax = plt.subplots(figsize=(12,8))
ax.plot(x, f, 'r', label='Prediction')
ax.scatter(data['房子面积'], data['房子价格'], label='Traning Data')
ax.legend(loc=2)
ax.set_xlabel('房子面积')
ax.set_ylabel('房子价格')
ax.set_title('北京房价拟合曲线图')
plt.show()
```
阅读全文