import numpy as np def loaddata(): X = np.array([[1,'S'],[1,'M'],[1,'M'],[1,'S'], [1, 'S'], [2, 'S'], [2, 'M'], [2, 'M'], [2, 'L'], [2, 'L'], [3, 'L'], [3, 'M'], [3, 'M'], [3, 'L'], [3, 'L']]) y = np.array([-1,-1,1,1,-1,-1,-1,1,1,1,1,1,1,1,-1]) return X, y def Train(trainset,train_labels): m = trainset.shape[0] n = trainset.shape[1] prior_probability = {}# 先验概率 key是类别值,value是类别的概率值 conditional_probability ={}# 条件概率 key的构造:类别,特征,特征值 #类别的可能取值 labels = set(train_labels) # 计算先验概率(此时没有除以总数据量m) for label in labels: prior_probability[label] = len(train_labels[train_labels == label])+1 #计算条件概率 for i in range(m): for j in range(n): # key的构造:类别,特征,特征值 #补充计算条件概率的代码-1; key = str(train_labels[i])+','+str(j)+','+str(trainset[i][j]) conditional_probability[key] = (conditional_probability[key]+1 if (key in conditional_probability) else 1) conditional_probability_final = {} for key in conditional_probability: #补充计算条件概率的代码-2; label = key.split(',')[0] conditional_probability[key]+=1 key1 = int(key.split(',')[1]) Ni = len(set(trainset[:,key1])) conditional_probability_final[key] =conditional_probability[key]/(prior_probability[int(label)]+Ni) # 最终的先验概率(此时除以总数据量m) for label in labels: prior_probability[label] = prior_probability[label]/ (m+len(labels)) return prior_probability,conditional_probability_final,labels def predict(data): result={} for label in train_labels_set: temp=1.0 #补充预测代码; print('result=',result) #排序返回标签值 result[label] = temp*prior_probability[label] for i in range (len(data)): key = str(label)+ ','+str(i)+','+str(data[i]) result[label]*=conditional_probability_final[key] print('result=',result) #排序返回标签值 return sorted(result.items(), key=lambda x: x[1],reverse=True)[0][0] X,y = loaddata() prior_probability,conditional_probability,train_labels_set = Train(X,y) r_label = predict([2,'S']) print(' r_label =', r_label)运行次python代码
时间: 2024-01-23 07:03:45 浏览: 69
Python3X np.load.txt
这段代码的功能是实现朴素贝叶斯算法进行二分类(标签值为-1和1),对给定的测试数据进行分类预测。其中loaddata()函数载入训练数据集X和标签集y,Train()函数训练得到先验概率prior_probability和条件概率conditional_probability_final,predict()函数根据输入的测试数据进行分类预测。
具体来说,Train()函数中计算了先验概率和条件概率,先验概率是指某个类别在总样本中出现的概率,条件概率是指在给定类别的情况下某个特征出现的概率。predict()函数中根据输入测试数据计算不同类别的概率,并返回最有可能的类别。
该代码需要先调用loaddata()函数载入训练数据,然后调用Train()函数进行训练得到先验概率prior_probability和条件概率conditional_probability_final,最后调用predict()函数进行分类预测。
阅读全文