用python朴素贝叶斯实现弱密码检测
时间: 2024-05-25 11:02:39 浏览: 6
import re
import math
import numpy as np
# 加载数据集
def load_dataset():
dataset = [] # 数据集
labels = [] # 标签
# 读取弱密码文件
with open('weak_passwords.txt', 'r') as f:
for line in f.readlines():
line = line.strip()
if len(line) > 0:
dataset.append(line)
labels.append(1) # 弱密码标签为1
# 读取非弱密码文件
with open('strong_passwords.txt', 'r') as f:
for line in f.readlines():
line = line.strip()
if len(line) > 0:
dataset.append(line)
labels.append(0) # 非弱密码标签为0
return dataset, labels
# 将文本转换成词向量
def text_to_vector(text):
words = re.findall(r'\w+', text.lower()) # 提取单词并转换成小写
word_counts = {} # 单词计数
for word in words:
if word not in word_counts:
word_counts[word] = 0
word_counts[word] += 1
return word_counts
# 计算词向量的概率
def calculate_probabilities(dataset, labels):
num_docs = len(dataset) # 数据集大小
num_words = {} # 单词出现次数
for i in range(num_docs):
text = dataset[i]
label = labels[i]
word_counts = text_to_vector(text)
for word, count in word_counts.items():
if word not in num_words:
num_words[word] = [0, 0]
num_words[word][label] += count
probabilities = {}
for word, counts in num_words.items():
total = sum(counts)
probabilities[word] = [counts[0] / total, counts[1] / total]
return probabilities
# 预测标签
def predict(text, probabilities):
word_counts = text_to_vector(text)
num_words = len(probabilities)
prob_weak = 0.5 # 先验概率
prob_strong = 0.5
for word, count in word_counts.items():
if word in probabilities:
prob_weak += math.log(probabilities[word][1])
prob_strong += math.log(probabilities[word][0])
else:
# 没有出现在训练集中的单词概率为0.5
prob_weak += math.log(0.5)
prob_strong += math.log(0.5)
if prob_weak > prob_strong:
return 1 # 弱密码
else:
return 0 # 非弱密码
# 计算准确率
def accuracy(predicted_labels, true_labels):
num_correct = np.sum(np.array(predicted_labels) == np.array(true_labels))
return num_correct / len(predicted_labels)
if __name__ == '__main__':
dataset, labels = load_dataset()
probabilities = calculate_probabilities(dataset, labels)
# 测试模型
test_dataset = ['password123', 'qwertyuiop', 'iloveyou', '123456']
test_labels = [1, 1, 1, 0]
predicted_labels = []
for text in test_dataset:
predicted_labels.append(predict(text, probabilities))
print('预测标签:', predicted_labels)
print('真实标签:', test_labels)
print('准确率:', accuracy(predicted_labels, test_labels))