import jieba f1= open("sy9//宋词.txt", "r") #代码开始 #代码结束 for i in range(15): print("{}{}".format(items[i][0],items[i][1]))
时间: 2024-02-22 12:58:56 浏览: 33
这段代码缺少一些必要的部分,无法直接运行。不过根据代码可知,使用了jieba分词库对文件进行了分词,并将词语频率统计结果存储在了items中,然后输出词频最高的前15个词语及其频率。如果需要完整的代码,可以参考下面的示例:
```python
import jieba
from collections import Counter
# 读取文件
with open('sy9/宋词.txt', 'r', encoding='utf-8') as f:
content = f.read()
# 分词
words = jieba.cut(content)
# 统计词频
word_freq = Counter(words)
# 删除停用词
stopwords = {',', '。', '?', '!', '、', ':', ';', '\n'}
for w in stopwords:
del word_freq[w]
# 输出词频最高的15个词语
items = word_freq.most_common(15)
for i in range(15):
print("{}{}".format(items[i][0],items[i][1]))
```
注:需要将代码中的文件名改为实际的文件名,并确保文件与代码在同一目录下。此外,为了更好的词频统计效果,建议自行准备一个停用词表(如上述示例中的stopwords),以删除常见的无意义词语。
相关问题
import jieba import wordcloud import imageio mask = imageio.imread('C:/Users/Febird/Desktop/1.png') #设定一个词云背景 with open('C:/Users/Febird/Desktop/傲世.txt', encoding='utf-8') as f: t = f.read() # 打开需要制作词云图的文件 ls = jieba.lcut(t) txt = " ".join(ls) # 将文章中的词组提出来 stopwords_file = open('C:/Users/Febird/Desktop/stopWord.txt', 'r', encoding='utf-8') stopwords = [(words.strip()) for words in stopwords_file.readlines()] w = wordcloud.WordCloud(width=4000, height=4000, stopwords=stopwords, font_path="msyh.ttc", colormap='hot', background_color='pink', mask=mask) # 定义一个词云 w.generate(txt) #生成词云 w.to_file(r'C:/Users/Febird/Desktop/test_3.png') #将词组变量txt导入词云对象w中并保存 添加代码 完成排名前五十个词的词云
在代码中添加以下部分可以完成排名前五十个词的词云:
```
from collections import Counter
# 统计词频
words_count = Counter(ls)
# 获取词频前50的词及其频次
top50_words = words_count.most_common(50)
# 将排名前50的词转换为一个字符串
top50_str = ""
for word, count in top50_words:
top50_str += (word + " ") * count
# 生成词云
w.generate(top50_str)
```
完整代码如下:
```
import jieba
import wordcloud
import imageio
from collections import Counter
mask = imageio.imread('C:/Users/Febird/Desktop/1.png')
with open('C:/Users/Febird/Desktop/傲世.txt', encoding='utf-8') as f:
t = f.read()
ls = jieba.lcut(t)
txt = " ".join(ls)
stopwords_file = open('C:/Users/Febird/Desktop/stopWord.txt', 'r', encoding='utf-8')
stopwords = [(words.strip()) for words in stopwords_file.readlines()]
w = wordcloud.WordCloud(width=4000, height=4000,
stopwords=stopwords,
font_path="msyh.ttc",
colormap='hot',
background_color='pink',
mask=mask)
# 统计词频
words_count = Counter(ls)
# 获取词频前50的词及其频次
top50_words = words_count.most_common(50)
# 将排名前50的词转换为一个字符串
top50_str = ""
for word, count in top50_words:
top50_str += (word + " ") * count
# 生成词云
w.generate(top50_str)
w.to_file(r'C:/Users/Febird/Desktop/test_3.png')
```
import jieba def word_extract(): # 读取文件 corpus = [] path = 'D:/自然语言处理/第2章/data/金庸-白马啸西风.txt' content = '' for line in open(path, 'r', encoding='gbk', errors='ignore'): line = line.strip() content += line corpus.append(content) # 加载停用词 stop_words = [] path = 'D:/自然语言处理/第4章/data/stopword.txt' for line in open(path, encoding='utf8'): line = line.strip() stop_words.append(line) # jieba分词 split_words = [] word_list = jieba.cut(corpus[0]) for word in word_list: if word not in stop_words: split_words.append(word)这段代码哪里有问题
这段代码的缩进有问题,导致程序运行时会报错。具体来说,for循环和if语句的缩进应该向右移动一个制表符或四个空格,如下所示:
```
import jieba
def word_extract():
# 读取文件
corpus = []
path = 'D:/自然语言处理/第2章/data/金庸-白马啸西风.txt'
content = ''
for line in open(path, 'r', encoding='gbk', errors='ignore'):
line = line.strip()
content += line
corpus.append(content)
# 加载停用词
stop_words = []
path = 'D:/自然语言处理/第4章/data/stopword.txt'
for line in open(path, encoding='utf8'):
line = line.strip()
stop_words.append(line)
# jieba分词
split_words = []
word_list = jieba.cut(corpus[0])
for word in word_list:
if word not in stop_words:
split_words.append(word)
return split_words
```
相关推荐
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![pdf](https://img-home.csdnimg.cn/images/20210720083512.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![-](https://csdnimg.cn/download_wenku/file_type_column_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)
![](https://csdnimg.cn/download_wenku/file_type_ask_c1.png)