import pandas as pd from openpyxl import Workbook # 获取主题下词语的概率分布 def get_topic_word_distribution(lda, tf_feature_names): arr = lda.transform(tf_vectorizer.transform([' '.join(tf_feature_names)])) return arr[0] # 打印主题下词语的概率分布 def print_topic_word_distribution(lda, tf_feature_names, n_top_words,n_topics): dist = get_topic_word_distribution(lda, tf_feature_names,n_topics) for i in range(n_topics): print("Topic {}: {}".format(i, ', '.join("{:.4f}".format(x) for x in dist[i]))) # 输出每个主题下词语的概率分布至Excel表格 def output_topic_word_distribution_to_excel(lda, tf_feature_names, n_top_words, n_topics,filename): # 创建Excel工作簿和工作表 wb = Workbook() ws = wb.active ws.title = "Topic Word Distribution" # 添加表头 ws.cell(row=1, column=1).value = "Topic" for j in range(n_top_words): ws.cell(row=1, column=j+2).value = tf_feature_names[j] # 添加每个主题下词语的概率分布 dist = get_topic_word_distribution(lda, tf_feature_names, n_topics) for i in range(n_topics): ws.cell(row=i+2, column=1).value = i for j in range(n_top_words): ws.cell(row=i+2, column=j+2).value = dist[i][j] # 保存Excel文件 wb.save(filename) n_top_words = 30 tf_feature_names = tf_vectorizer.get_feature_names() topic_word = print_topic_word_distribution(lda, tf_feature_names, n_top_words, n_topics) #print_topic_word_distribution(lda, tf_feature_names, n_top_words) output_topic_word_distribution_to_excel(lda, tf_feature_names, n_top_words, n_topics, "topic_word_distribution.xlsx")报错Traceback (most recent call last): File "D:\python\lda3\data_1.py", line 157, in <module> topic_word = print_topic_word_distribution(lda, tf_feature_names, n_top_words, n_topics) File "D:\python\lda3\data_1.py", line 128, in print_topic_word_distribution dist = get_topic_word_distribution(lda, tf_feature_names,n_topics) TypeError: get_topic_word_distribution() takes 2 positional arguments but 3 were given
时间: 2024-01-24 21:16:51 浏览: 38
错误提示表明 get_topic_word_distribution() 函数只需要两个位置参数,但是在 print_topic_word_distribution() 函数中却传递了三个位置参数。检查一下代码,发现 print_topic_word_distribution() 函数中还传递了一个 n_topics 参数,它应该被移除。修改代码如下:
```
def print_topic_word_distribution(lda, tf_feature_names, n_top_words):
dist = get_topic_word_distribution(lda, tf_feature_names)
for i in range(lda.n_components):
print("Topic {}: {}".format(i, ', '.join("{:.4f}".format(x) for x in dist[i])))
def output_topic_word_distribution_to_excel(lda, tf_feature_names, n_top_words, filename):
# 创建Excel工作簿和工作表
wb = Workbook()
ws = wb.active
ws.title = "Topic Word Distribution"
# 添加表头
ws.cell(row=1, column=1).value = "Topic"
for j in range(n_top_words):
ws.cell(row=1, column=j+2).value = tf_feature_names[j]
# 添加每个主题下词语的概率分布
dist = get_topic_word_distribution(lda, tf_feature_names)
for i in range(lda.n_components):
ws.cell(row=i+2, column=1).value = i
for j in range(n_top_words):
ws.cell(row=i+2, column=j+2).value = dist[i][j]
# 保存Excel文件
wb.save(filename)
```
注意,这里需要使用 lda.n_components 属性来获取主题数,因为 lda.transform() 返回的数组的第一维的长度就等于主题数。
相关推荐
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)