from gensim.models import CoherenceModel
from gensim.corpora import Dictionary
from gensim.models.ldamodel import LdaModel
from nltk.corpus import stopwords
# Load data
data = [["topic", "modeling", "is", "a", "useful", "technique"],
["it", "can", "help", "us", "to", "understand", "large", "text", "corpora"],
["there", "are", "many", "algorithms", "that", "can", "be", "used", "for", "topic", "modeling"]]
# Create dictionary and corpus
id2word = Dictionary(data)
texts = data
corpus = [id2word.doc2bow(text) for text in texts]
# Create list of stopwords
stop_words = stopwords.words('english')
# Build LDA model with different number of topics and compute coherence score
for num_topics in range(2, 11):
lda_model = LdaModel(corpus=corpus, id2word=id2word, num_topics=num_topics, passes=10, alpha='auto', eta='auto')
coherence_model_lda = CoherenceModel(model=lda_model, texts=texts, dictionary=id2word, coherence='c_v', stopwords=stop_words)
coherence_lda = coherence_model_lda.get_coherence()
print('Number of Topics =', num_topics, 'Coherence Score =', coherence_lda)