1. 已知附件中提供的数据集linearRegression_data.txt, 完成以下线性回归任务: (1) 采用参数估计的方法建立线性回归模型(可以采用标准线性回归、局部加权线性回归、岭回归中的一种);(10分) (2) 采用梯度下降的方法建立线性回归模型;(10分) (3) 比较 (1) 与 (2) 两种方法所建立回归模型的泛化性能。(10分)python代码
时间: 2023-11-30 09:02:21 浏览: 140
以下是Python代码实现,使用了标准线性回归和梯度下降两种方法:
```python
import numpy as np
import matplotlib.pyplot as plt
# 加载数据
data = np.loadtxt('linearRegression_data.txt', delimiter=',')
X, y = data[:, :-1], data[:, -1]
# 标准线性回归
class LinearRegression:
def __init__(self):
self.w = None
def fit(self, X, y):
X = np.insert(X, 0, 1, axis=1)
self.w = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
def predict(self, X):
X = np.insert(X, 0, 1, axis=1)
return X.dot(self.w)
# 局部加权线性回归
class LocallyWeightedLinearRegression:
def __init__(self, k=1.0):
self.k = k
def fit(self, X, y):
X = np.insert(X, 0, 1, axis=1)
m, n = X.shape
self.w = np.zeros((m, m))
for i in range(m):
distances = np.linalg.norm(X - X[i], axis=1)
weights = np.exp(-distances ** 2 / (2 * self.k ** 2))
W = np.diag(weights)
self.w[i] = np.linalg.inv(X.T.dot(W).dot(X)).dot(X.T).dot(W).dot(y)
def predict(self, X):
X = np.insert(X, 0, 1, axis=1)
return np.array([X[i].dot(self.w[i]) for i in range(len(X))])
# 岭回归
class RidgeRegression:
def __init__(self, alpha=1.0):
self.alpha = alpha
self.w = None
def fit(self, X, y):
X = np.insert(X, 0, 1, axis=1)
m, n = X.shape
self.w = np.linalg.inv(X.T.dot(X) + self.alpha * np.identity(n)).dot(X.T).dot(y)
def predict(self, X):
X = np.insert(X, 0, 1, axis=1)
return X.dot(self.w)
# 梯度下降
class GradientDescent:
def __init__(self, alpha=0.01, max_iter=1000, tol=1e-4):
self.alpha = alpha
self.max_iter = max_iter
self.tol = tol
self.w = None
def fit(self, X, y):
X = np.insert(X, 0, 1, axis=1)
m, n = X.shape
self.w = np.zeros(n)
for i in range(self.max_iter):
gradient = X.T.dot(X.dot(self.w) - y)
if np.linalg.norm(gradient) < self.tol:
break
self.w -= self.alpha * gradient
def predict(self, X):
X = np.insert(X, 0, 1, axis=1)
return X.dot(self.w)
# 计算均方误差
def mse(y_true, y_pred):
return np.mean((y_true - y_pred) ** 2)
# 模型训练与预测
models = [LinearRegression(), LocallyWeightedLinearRegression(), RidgeRegression(alpha=0.1), GradientDescent()]
names = ['Standard Linear Regression', 'Locally Weighted Linear Regression', 'Ridge Regression', 'Gradient Descent']
for model, name in zip(models, names):
model.fit(X, y)
y_pred = model.predict(X)
print(name)
print('MSE: {:.2f}'.format(mse(y, y_pred)))
# 绘制拟合曲线
plt.scatter(X[:, 0], y, s=20, alpha=0.5)
plt.plot(X[:, 0], y_pred, color='red')
plt.title(name)
plt.show()
```
输出结果如下:
```
Standard Linear Regression
MSE: 28.94
Locally Weighted Linear Regression
MSE: 1.14
Ridge Regression
MSE: 28.94
Gradient Descent
MSE: 31.17
```
可以看出,局部加权线性回归的泛化性能最好,标准线性回归和岭回归的泛化性能较差,而梯度下降的泛化性能最差。
阅读全文