不使用sklearn 采用逻辑回归分类方法对乳腺癌数据进行分类,使用Python编程语言自己实现以下性能评价指标:混淆矩阵、准确率、精确率、召回率、F1值。
时间: 2023-08-03 07:05:44 浏览: 104
好的,我可以给您提供一个基于NumPy实现逻辑回归的代码,并计算混淆矩阵、准确率、精确率、召回率、F1值。
首先,我们需要下载乳腺癌数据集,这里我使用了sklearn中自带的数据集。具体代码如下:
```python
from sklearn.datasets import load_breast_cancer
import numpy as np
# 加载数据集
data = load_breast_cancer()
# 获取特征和标签
X = data.data
y = data.target
# 将标签转换为列向量
y = y.reshape(-1, 1)
# 将数据集按照8:2的比例划分为训练集和测试集
train_size = int(X.shape[0] * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]
# 特征归一化
mean = X_train.mean(axis=0)
std = X_train.std(axis=0)
X_train = (X_train - mean) / std
X_test = (X_test - mean) / std
```
接下来,我们实现逻辑回归模型。具体代码如下:
```python
class LogisticRegression:
def __init__(self, lr=0.01, num_iter=100000, fit_intercept=True):
self.lr = lr
self.num_iter = num_iter
self.fit_intercept = fit_intercept
def __add_intercept(self, X):
intercept = np.ones((X.shape[0], 1))
return np.concatenate((intercept, X), axis=1)
def __sigmoid(self, z):
return 1 / (1 + np.exp(-z))
def __loss(self, h, y):
return (-y * np.log(h) - (1 - y) * np.log(1 - h)).mean()
def fit(self, X, y):
if self.fit_intercept:
X = self.__add_intercept(X)
# 初始化权重
self.theta = np.zeros((X.shape[1], 1))
for i in range(self.num_iter):
# 计算模型预测值
z = np.dot(X, self.theta)
h = self.__sigmoid(z)
# 计算梯度
gradient = np.dot(X.T, (h - y)) / y.size
# 更新权重
self.theta -= self.lr * gradient
def predict_prob(self, X):
if self.fit_intercept:
X = self.__add_intercept(X)
return self.__sigmoid(np.dot(X, self.theta))
def predict(self, X, threshold=0.5):
return self.predict_prob(X) >= threshold
```
接下来,我们进行模型训练和预测,并计算评价指标。具体代码如下:
```python
# 创建逻辑回归对象
model = LogisticRegression(lr=0.1, num_iter=100000)
# 模型训练
model.fit(X_train, y_train)
# 模型预测
y_pred = model.predict(X_test)
# 混淆矩阵
confusion_matrix = np.zeros((2, 2))
for i in range(len(y_test)):
if y_test[i] == 1 and y_pred[i] == 1:
confusion_matrix[0][0] += 1
elif y_test[i] == 1 and y_pred[i] == 0:
confusion_matrix[0][1] += 1
elif y_test[i] == 0 and y_pred[i] == 1:
confusion_matrix[1][0] += 1
elif y_test[i] == 0 and y_pred[i] == 0:
confusion_matrix[1][1] += 1
# 准确率
accuracy = (confusion_matrix[0][0] + confusion_matrix[1][1]) / np.sum(confusion_matrix)
# 精确率
precision = confusion_matrix[0][0] / (confusion_matrix[0][0] + confusion_matrix[1][0])
# 召回率
recall = confusion_matrix[0][0] / (confusion_matrix[0][0] + confusion_matrix[0][1])
# F1值
f1_score = 2 * precision * recall / (precision + recall)
```
完整的代码如下:
阅读全文