用python和tf-idf算法提取评论关键词的代码
时间: 2024-05-19 11:17:40 浏览: 125
以下是使用Python和TF-IDF算法提取评论关键词的代码:
首先需要安装sklearn和jieba库,使用pip install sklearn 和 pip install jieba进行安装。
import jieba
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer
corpus = [] # 评论列表
# 读取评论文本文件并存入corpus列表中
with open('comments.txt', 'r', encoding='utf-8') as f:
lines = f.readlines()
for line in lines:
corpus.append(line.strip())
# 使用jieba进行分词
corpus_list = []
for line in corpus:
words = jieba.lcut(line)
corpus_list.append(" ".join(words))
# 计算词频矩阵
count_vect = CountVectorizer()
X_train_counts = count_vect.fit_transform(corpus_list)
# 计算tf-idf
tfidf_transformer = TfidfTransformer()
X_train_tfidf = tfidf_transformer.fit_transform(X_train_counts)
# 获取关键词
feature_names = count_vect.get_feature_names()
scores = X_train_tfidf.toarray().sum(axis=0)
keywords = [(feature_names[i], scores[i]) for i in range(len(feature_names))]
# 按tf-idf值从大到小排序并输出前10个关键词
keywords.sort(key=lambda x:x[1],reverse=True)
for i in range(10):
print(keywords[i][0])
阅读全文
相关推荐















