乳腺癌数据集是机器学习经典的数据集之一,接下来请使用这个数据集完成线性回归底层及决策树回归模型的训练 1、 调用sklearn包加载乳腺癌数据集 2、 获取特征与标签矩阵 3、 进行标准化 4、 洗牌 5、 切分训练集测试集 6、 创建线性模型 7、 创建代价函数 8、 创建梯度下降函数 9、 创建精度函数 10、 训练模型,画出代价函数 11、 输出测试集精度 12、 创建决策树分类模型 13、 模型深度为5 14、 训练模型 15、 输出测试集预测值 16、 输出测试集精度
时间: 2023-07-10 07:35:07 浏览: 166
好的,以下是基于乳腺癌数据集的线性回归底层及决策树回归模型的训练代码示例:
```python
# 导入所需的库
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
import numpy as np
# 加载乳腺癌数据集
cancer = load_breast_cancer()
X = cancer.data
Y = cancer.target
# 数据标准化
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
# 数据洗牌
np.random.seed(0)
shuffle_index = np.random.permutation(len(X))
X, Y = X[shuffle_index], Y[shuffle_index]
# 切分训练集和测试集
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)
# 线性回归底层实现
class LinearRegression:
def __init__(self, lr=0.01, n_iters=1000):
self.lr = lr
self.n_iters = n_iters
self.weights = None
self.bias = None
def fit(self, X, Y):
# 初始化权重和偏置
n_samples, n_features = X.shape
self.weights = np.zeros(n_features)
self.bias = 0
# 梯度下降求解模型参数
for _ in range(self.n_iters):
Y_pred = np.dot(X, self.weights) + self.bias
dw = (1 / n_samples) * np.dot(X.T, (Y_pred - Y))
db = (1 / n_samples) * np.sum(Y_pred - Y)
self.weights -= self.lr * dw
self.bias -= self.lr * db
def predict(self, X):
Y_pred = np.dot(X, self.weights) + self.bias
return Y_pred
# 创建线性模型
lr = LinearRegression()
# 创建代价函数
def cost_function(Y_pred, Y_true):
n_samples = len(Y_true)
cost = (1 / (2 * n_samples)) * np.sum((Y_pred - Y_true)**2)
return cost
# 创建梯度下降函数
def gradient_descent(X, Y, lr, n_iters):
n_samples, n_features = X.shape
weights = np.zeros(n_features)
bias = 0
costs = []
for _ in range(n_iters):
Y_pred = np.dot(X, weights) + bias
dw = (1 / n_samples) * np.dot(X.T, (Y_pred - Y))
db = (1 / n_samples) * np.sum(Y_pred - Y)
weights -= lr * dw
bias -= lr * db
cost = cost_function(Y_pred, Y)
costs.append(cost)
return weights, bias, costs
# 创建精度函数
def accuracy(Y_pred, Y_true):
accuracy = np.sum(Y_pred == Y_true) / len(Y_true)
return accuracy
# 训练线性回归模型,画出代价函数图
lr.fit(X_train, Y_train)
Y_pred_train = lr.predict(X_train)
Y_pred_test = lr.predict(X_test)
costs = gradient_descent(X_train, Y_train, lr=0.01, n_iters=1000)[-1]
import matplotlib.pyplot as plt
plt.plot(costs)
plt.title('Cost Function')
plt.xlabel('Iterations')
plt.ylabel('Cost')
plt.show()
# 输出测试集精度
print('Linear Regression Accuracy:', accuracy(np.round(Y_pred_test), Y_test))
# 创建决策树回归模型
tree_reg = DecisionTreeRegressor(max_depth=5)
# 训练决策树回归模型
tree_reg.fit(X_train, Y_train)
# 输出测试集预测值
Y_pred_test = tree_reg.predict(X_test)
print('Decision Tree Regression Prediction:', Y_pred_test)
# 输出测试集精度
print('Decision Tree Regression Accuracy:', accuracy(np.round(Y_pred_test), Y_test))
```
希望这个示例代码对你有所帮助!
阅读全文