用python写基于词袋模型的场景识别
时间: 2024-05-22 19:15:51 浏览: 73
基于词袋模型的场景识别配套代码Scene_Recognition_with_Bag_of_Words
5星 · 资源好评率100%
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
# 定义场景类别及其对应的编号
categories = {'sports': 0, 'finance': 1, 'technology': 2}
# 定义训练数据集,每个文本对应一个类别编号
train_data = [
('I love playing football', 0),
('I am a loyal fan of Messi', 0),
('Stock market is booming', 1),
('Investing in a good stock can bring you wealth', 1),
('New iPhone X has amazing features', 2),
('Artificial intelligence is the future', 2)
]
# 定义测试数据集
test_data = [
'Football is my passion',
'I am thinking of investing in stock market',
'I want to buy the new iPhone X',
'AI will change the world'
]
# 构建词袋模型
vectorizer = CountVectorizer()
# 将训练数据集中的文本和类别编号分别提取出来
train_texts = [data[0] for data in train_data]
train_labels = [data[1] for data in train_data]
# 将文本转化为词袋向量
train_vectors = vectorizer.fit_transform(train_texts)
# 构建朴素贝叶斯分类器并训练
clf = MultinomialNB()
clf.fit(train_vectors, train_labels)
# 将测试数据集中的文本转化为词袋向量
test_vectors = vectorizer.transform(test_data)
# 对测试数据进行预测
predicted_labels = clf.predict(test_vectors)
# 输出预测结果
for i, predicted_label in enumerate(predicted_labels):
for category, label in categories.items():
if label == predicted_label:
print('{} : {}'.format(test_data[i], category))
break
阅读全文