numpy 编写逻辑回归算法对 iris 数据进行多分类并可视化
时间: 2023-06-29 15:05:51 浏览: 96
首先,我们需要加载数据集并将其划分为训练集和测试集。这里我选择使用sklearn库中的iris数据集。
```python
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
```
接下来,我们需要实现逻辑回归算法。对于多分类问题,我们可以使用softmax函数来计算每个类别的概率。我们可以使用交叉熵损失函数来评估模型的性能。实现如下:
```python
def softmax(x):
exp_x = np.exp(x)
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
def cross_entropy_loss(y_pred, y_true):
m = y_true.shape[0]
p = softmax(y_pred)
log_likelihood = -np.log(p[range(m), y_true])
loss = np.sum(log_likelihood) / m
return loss
def logistic_regression(X, y, num_iterations, learning_rate):
m, n = X.shape
num_classes = len(np.unique(y))
W = np.random.randn(n, num_classes) * 0.01
b = np.zeros((1, num_classes))
for i in range(num_iterations):
z = np.dot(X, W) + b
y_pred = softmax(z)
loss = cross_entropy_loss(y_pred, y)
dz = y_pred - (y_true == np.array([range(num_classes)]).reshape(num_classes, -1)).T
dW = np.dot(X.T, dz)
db = np.sum(dz, axis=0, keepdims=True)
W -= learning_rate * dW
b -= learning_rate * db
if i % 100 == 0:
print("Iteration {}: loss = {}".format(i, loss))
return W, b
```
最后,我们可以使用训练好的模型进行预测并可视化结果。这里我选择使用matplotlib库来绘制决策边界。
```python
import matplotlib.pyplot as plt
W, b = logistic_regression(X_train, y_train, num_iterations=1000, learning_rate=0.01)
def predict(X, W, b):
z = np.dot(X, W) + b
y_pred = np.argmax(softmax(z), axis=1)
return y_pred
y_pred = predict(X_test, W, b)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.show()
```
完整代码如下:
```python
import numpy as np
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
def softmax(x):
exp_x = np.exp(x)
return exp_x / np.sum(exp_x, axis=1, keepdims=True)
def cross_entropy_loss(y_pred, y_true):
m = y_true.shape[0]
p = softmax(y_pred)
log_likelihood = -np.log(p[range(m), y_true])
loss = np.sum(log_likelihood) / m
return loss
def logistic_regression(X, y, num_iterations, learning_rate):
m, n = X.shape
num_classes = len(np.unique(y))
W = np.random.randn(n, num_classes) * 0.01
b = np.zeros((1, num_classes))
for i in range(num_iterations):
z = np.dot(X, W) + b
y_pred = softmax(z)
loss = cross_entropy_loss(y_pred, y)
dz = y_pred - (y_true == np.array([range(num_classes)]).reshape(num_classes, -1)).T
dW = np.dot(X.T, dz)
db = np.sum(dz, axis=0, keepdims=True)
W -= learning_rate * dW
b -= learning_rate * db
if i % 100 == 0:
print("Iteration {}: loss = {}".format(i, loss))
return W, b
def predict(X, W, b):
z = np.dot(X, W) + b
y_pred = np.argmax(softmax(z), axis=1)
return y_pred
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
W, b = logistic_regression(X_train, y_train, num_iterations=1000, learning_rate=0.01)
y_pred = predict(X_test, W, b)
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred)
plt.xlabel('Sepal length')
plt.ylabel('Sepal width')
plt.show()
```
阅读全文