朴素贝叶斯算法实现mnist数据集的读取与显示、数据集划分、算法实现的完整代码
时间: 2023-11-12 08:07:42 浏览: 82
好的,以下是用朴素贝叶斯算法实现MNIST数据集分类的完整代码:
```python
import numpy as np
import struct
import matplotlib.pyplot as plt
# 读取MNIST数据集
def load_mnist(path, kind='train'):
labels_path = path + '/' + kind + '-labels-idx1-ubyte'
images_path = path + '/' + kind + '-images-idx3-ubyte'
with open(labels_path, 'rb') as lbpath:
magic, n = struct.unpack('>II', lbpath.read(8))
labels = np.fromfile(lbpath, dtype=np.uint8)
with open(images_path, 'rb') as imgpath:
magic, num, rows, cols = struct.unpack('>IIII', imgpath.read(16))
images = np.fromfile(imgpath, dtype=np.uint8).reshape(len(labels), 784)
return images, labels
# 显示MNIST数据集中的一张图片
def show_image(image):
plt.imshow(image.reshape(28, 28), cmap='gray')
plt.show()
# 划分数据集为训练集和测试集
def split_dataset(dataset, labels, test_ratio=0.2):
num_samples = dataset.shape[0]
num_test = int(num_samples * test_ratio)
indices = np.random.permutation(num_samples)
test_indices = indices[:num_test]
train_indices = indices[num_test:]
train_dataset = dataset[train_indices]
train_labels = labels[train_indices]
test_dataset = dataset[test_indices]
test_labels = labels[test_indices]
return train_dataset, train_labels, test_dataset, test_labels
# 计算多项式分布概率密度函数
def multinomial_prob(x, theta):
return np.exp(np.sum(x * np.log(theta), axis=1) + np.log(1 - theta.sum(axis=1)))
# 训练朴素贝叶斯算法模型
def train_naive_bayes(X, y):
num_samples, num_features = X.shape
num_classes = len(np.unique(y))
priors = np.zeros(num_classes)
theta = np.zeros((num_classes, num_features))
for i in range(num_classes):
X_i = X[y == i]
priors[i] = len(X_i) / float(num_samples)
theta[i] = (X_i.sum(axis=0) + 1) / (float(X_i.sum() + num_features))
return priors, theta
# 使用朴素贝叶斯算法模型进行预测
def predict_naive_bayes(X, priors, theta):
num_samples, num_features = X.shape
num_classes = len(priors)
log_probs = np.zeros((num_samples, num_classes))
for i in range(num_classes):
log_probs[:, i] = np.log(priors[i]) + np.sum(np.log(theta[i]) * X, axis=1)
return np.argmax(log_probs, axis=1)
# 加载MNIST数据集
X_train, y_train = load_mnist('data', kind='train')
X_test, y_test = load_mnist('data', kind='t10k')
# 显示一张图片
show_image(X_train[0])
# 划分训练集和测试集
X_train, y_train, X_test, y_test = split_dataset(X_train, y_train, test_ratio=0.2)
# 训练朴素贝叶斯算法模型
priors, theta = train_naive_bayes(X_train, y_train)
# 使用朴素贝叶斯算法模型进行预测
y_pred = predict_naive_bayes(X_test, priors, theta)
# 计算预测精度
accuracy = np.mean(y_pred == y_test) * 100
print('Accuracy: %.2f%%' % accuracy)
```
这里采用了多项式分布作为特征的概率分布函数,实现了朴素贝叶斯算法模型的训练和预测,并计算了预测精度。
阅读全文