贝叶斯实现手写体识别代码
时间: 2023-11-25 10:06:11 浏览: 76
这是一个基于朴素贝叶斯算法实现手写数字识别的Python代码示例:
```python
import numpy as np
import os
# 加载数据
def load_data(path):
X = []
y = []
for filename in os.listdir(path):
label = int(filename.split('_')[0])
y.append(label)
filepath = os.path.join(path, filename)
with open(filepath, 'r') as f:
lines = f.readlines()
x = []
for line in lines:
row = list(map(int, line.strip()))
x.append(row)
X.append(x)
return np.array(X), np.array(y)
# 计算像素点的条件概率
def calc_pixel_prob(X, y):
n_samples, height, width = X.shape
n_classes = len(np.unique(y))
pixel_prob = np.zeros((n_classes, height, width, 2))
for i in range(n_classes):
X_class = X[y==i]
pixel_count = np.sum(X_class, axis=0)
total_count = np.sum(X_class)
pixel_prob[i, :, :, 1] = (pixel_count + 1) / (total_count + 2)
pixel_prob[i, :, :, 0] = 1 - pixel_prob[i, :, :, 1]
return pixel_prob
# 计算先验概率
def calc_prior_prob(y):
n_classes = len(np.unique(y))
prior_prob = np.zeros(n_classes)
for i in range(n_classes):
prior_prob[i] = np.sum(y==i) / len(y)
return prior_prob
# 预测单个样本
def predict_one(X, pixel_prob, prior_prob):
height, width = X.shape
log_prob = np.zeros(len(prior_prob))
for i in range(len(prior_prob)):
log_prob[i] = np.log(prior_prob[i])
for j in range(height):
for k in range(width):
if X[j, k] == 1:
log_prob[i] += np.log(pixel_prob[i, j, k, 1])
else:
log_prob[i] += np.log(pixel_prob[i, j, k, 0])
return np.argmax(log_prob)
# 预测多个样本
def predict(X, pixel_prob, prior_prob):
y_pred = []
for i in range(X.shape[0]):
y_pred.append(predict_one(X[i], pixel_prob, prior_prob))
return np.array(y_pred)
# 加载训练数据和测试数据
train_path = 'mnist/train'
test_path = 'mnist/test'
X_train, y_train = load_data(train_path)
X_test, y_test = load_data(test_path)
# 计算像素点的条件概率和先验概率
pixel_prob = calc_pixel_prob(X_train, y_train)
prior_prob = calc_prior_prob(y_train)
# 预测测试数据并计算准确率
y_pred = predict(X_test, pixel_prob, prior_prob)
accuracy = np.sum(y_pred==y_test) / len(y_test)
print('Accuracy:', accuracy)
```
在这个代码中,load_data函数用于加载手写数字图片数据,calc_pixel_prob函数用于计算像素点的条件概率,calc_prior_prob函数用于计算先验概率,predict_one函数用于预测单个样本,predict函数用于预测多个样本。在实现中,我们使用了numpy库来方便地进行向量和矩阵的计算,使用了os库来方便地读取文件。最后,我们使用测试数据对模型进行评估,计算出了模型的准确率。
阅读全文