首页困惑度计算主题数量代码

困惑度计算主题数量代码

时间: 2023-08-06 10:24:58 浏览: 111

以下是在Python中计算主题数量的困惑度的代码示例：

from gensim.models import CoherenceModel
from gensim.corpora import Dictionary
from gensim.models.ldamodel import LdaModel
from nltk.corpus import stopwords

# Load data
data = [["topic", "modeling", "is", "a", "useful", "technique"],
        ["it", "can", "help", "us", "to", "understand", "large", "text", "corpora"],
        ["there", "are", "many", "algorithms", "that", "can", "be", "used", "for", "topic", "modeling"]]

# Create dictionary and corpus
id2word = Dictionary(data)
texts = data
corpus = [id2word.doc2bow(text) for text in texts]

# Create list of stopwords
stop_words = stopwords.words('english')

# Build LDA model with different number of topics and compute coherence score
for num_topics in range(2, 11):
    lda_model = LdaModel(corpus=corpus, id2word=id2word, num_topics=num_topics, passes=10, alpha='auto', eta='auto')
    coherence_model_lda = CoherenceModel(model=lda_model, texts=texts, dictionary=id2word, coherence='c_v', stopwords=stop_words)
    coherence_lda = coherence_model_lda.get_coherence()
    print('Number of Topics =', num_topics, 'Coherence Score =', coherence_lda)