Python_LDA实现方法详解_LDA模型

python

python算法

70 浏览量更新于2023-03-03 评论 3 收藏 68KB PDF 举报

身份认证购VIP最低享 7 折!

领优惠券(最高得80元）

资源详情

资源评论

资源推荐

Python_LDA实现方法详解实现方法详解

LDA(Latent Dirichlet allocation)模型是一种常用而用途广泛地概率主题模型。其实现一般通过Variational inference和Gibbs

Samping实现。作者在提出LDA模型时给出了其变分推理的C源码（后续贴出C++改编的类），这里贴出基于Python的第三方

模块改写的LDA类及实现。

#coding:utf-8

import numpy as np

import lda

import lda.datasets

import jieba

import codecs

class LDA_v20161130():

def __init__(self, topics=2):

self.n_topic = topics

self.corpus = None

self.vocab = None

self.ppCountMatrix = None

self.stop_words = [u'，', u'。', u'、', u'（', u'）', u'·', u'！', u' ', u'：', u'“', u'”', u''] self.model = None

def loadCorpusFromFile(self, fn):

# 中文分词

f = open(fn, 'r')

text = f.readlines()

text = r' '.join(text)

seg_generator = jieba.cut(text)

seg_list = [i for i in seg_generator if i not in self.stop_words] seg_list = r' '.join(seg_list)

# 切割统计所有出现的词纳入词典

seglist = seg_list.split(" ")

self.vocab = [] for word in seglist:

if (word != u' ' and word not in self.vocab):

self.vocab.append(word)

CountMatrix = [] f.seek(0, 0)

# 统计每个文档中出现的词频

for line in f:

# 置零

count = np.zeros(len(self.vocab),dtype=np.int)

text = line.strip()

# 但还是要先分词

seg_generator = jieba.cut(text)

seg_list = [i for i in seg_generator if i not in self.stop_words] seg_list = r' '.join(seg_list)

seglist = seg_list.split(" ")

# 查询词典中的词出现的词频

for word in seglist:

if word in self.vocab:

count[self.vocab.index(word)] += 1

CountMatrix.append(count)

f.close()

#self.ppCountMatrix = (len(CountMatrix), len(self.vocab))

self.ppCountMatrix = np.array(CountMatrix)

print "load corpus from %s success!"%fn

def setStopWords(self, word_list):

self.stop_words = word_list

def fitModel(self, n_iter = 1500, _alpha = 0.1, _eta = 0.01):

self.model = lda.LDA(n_topics=self.n_topic, n_iter=n_iter, alpha=_alpha, eta= _eta, random_state= 1)

self.model.fit(self.ppCountMatrix)

def printTopic_Word(self, n_top_word = 8):

for i, topic_dist in enumerate(self.model.topic_word_):

topic_words = np.array(self.vocab)[np.argsort(topic_dist)][:-(n_top_word + 1):-1] print "Topic:",i," ",

for word in topic_words:

print word,

def printDoc_Topic(self):

for i in range(len(self.ppCountMatrix)):

print ("Doc %d:((top topic:%s) topic distribution:%s)"%(i, self.model.doc_topic_[i].argmax(),self.model.doc_topic_[i]))

def printVocabulary(self):

print "vocabulary:"

for word in self.vocab:

print word,

def saveVocabulary(self, fn):

f = codecs.open(fn, 'w', 'utf-8')

for word in self.vocab:

f.write("%s"%word)

f.close()

def saveTopic_Words(self, fn, n_top_word = -1):

if n_top_word==-1:

n_top_word = len(self.vocab)

f = codecs.open(fn, 'w', 'utf-8')

本内容试读结束，登录后可阅读更多

下载后可阅读完整内容，剩余2页未读，立即下载

weixin_38747946

粉丝: 9
资源: 943

会员权益专享

Python_LDA实现方法详解

评论0

会员权益专享

最新资源

Python_LDA实现方法详解

评论0

LDA算法(MATLAB实现)

LDA资料（文章+源代码）

Mallet教程

python X_train_lda = lda.fit_transform(X_train, y_train)代码如何理解

pos_lda=models.LdaModel(pos_corpus,num_topics=2,id2word=pos_dict) neg_lda=models.LdaModel(neg_corpus,num_topics=2,id2word=neg_dict) pos_lda.print_topics(num_topics=10) neg_lda.print_topics(num_topics=10)

python实现LDA降维

用python实现LDA降维

ModuleNotFoundError: No module named ‘lda._lda

python利用LDA实现mnist数据集的可视化

distinctiveness_lda.append(lda_model.distinctiveness())报错'LdaModel' object has no attribute 'distinctiveness'

线性判别分析LDA的python代码实现

python实现LDA代码

No module named 'lda._lda'

python实现LDA降维，不使用sklearn

举一个例子，python，训练LDA主题模型，评价不同主题数的模型的困惑度来选择最优的那个模型

python实现LDA线性判别分析

python LDA降维后回归

lda降维python代码

python代码实现：LDA算法进行人脸识别

PythonLDA关键词提取

会员权益专享

最新资源