import jieba import wordcloud def takeSecond(elem): return elem[1] def createWordCloud(text): w=wordcloud.WordCloud ('font_path'=="msyh.ttf",'width'==1000,'height'==500,'background_color'=="white").generate(text) w.to_file("西游记词云图.jpg") def main(): path = "西游记.txt" file = open(path,"r",encoding="utf-8") text=file.read() file.close() words = jieba.lcut(text) counts = {} for word in words: if len(word) == 1: continue elif word == "大圣" or word=="老孙" or word=="行者" or word=="孙大圣" or word=="孙行者" or word=="猴王" or word=="悟空" or word=="齐天大圣" or word=="猴子": rword = "孙悟空" elif word == "师父" or word == "三藏" or word=="圣僧": rword = "唐僧" elif word == "呆子" or word=="八戒" or word=="老猪": rword = "猪八戒" elif word=="沙和尚": rword="沙僧" elif word == "妖精" or word=="妖魔" or word=="妖道": rword = "妖怪" elif word=="佛祖": rword="如来" elif word=="三太子": rword="白马" else: rword = word counts[rword] = counts.get(rword,0) + 1 file = open("excludes.txt","r",encoding="utf-8") excludes =file.read().split(",") file.close for delWord in excludes: try: del counts[delWord] except: continue items = list(counts.items()) items.sort(key = takeSecond,reverse=True) for i in range(20): item=items[i] keyWord =item[0] count=item[1] print("{0:<10}{1:>5}".format(keyWord,count)) createWordCloud(str(items[0:20])) main()
时间: 2024-04-07 17:27:59 浏览: 127
在代码中,创建词云对象时,参数的赋值方式不正确。应该使用等号(=)而不是双等号(==)。请修改代码中创建词云对象的部分,将`'font_path'=="msyh.ttf",'width'==1000,'height'==500,'background_color'=="white"`改为`font_path="msyh.ttf",width=1000,height=500,background_color="white"`。
相关问题
import jieba import wordcloud def takeSecond(elem): return elem[1] def createWordCloud(text): w=wordcloud.WordCloud (font_path="msyh.ttf",width=1000,height=500,background_color="white").generate(text) w.to_file("西游记词云图.jpg") def main(): path = "西游记.txt" file = open(path,"r",encoding="utf-8") text=file.read() file.close() words = jieba.lcut(text) counts = {} for word in words: if len(word) == 1: continue elif word == "大圣" or word=="老孙" or word=="行者" or word=="孙大圣" or word=="孙行者" or word=="猴王" or word=="悟空" or word=="齐天大圣" or word=="猴子": rword = "孙悟空" elif word == "师父" or word == "三藏" or word=="圣僧": rword = "唐僧" elif word == "呆子" or word=="八戒" or word=="老猪": rword = "猪八戒" elif word=="沙和尚": rword="沙僧" elif word == "妖精" or word=="妖魔" or word=="妖道": rword = "妖怪" elif word=="佛祖": rword="如来" elif word=="三太子": rword="白马" else: rword = word counts[rword] = counts.get(rword,0) + 1 file = open("excludes.txt","r",encoding="utf-8") excludes =file.read().split(",") file.close for delWord in excludes: try: del counts[delWord] except: continue items = list(counts.items()) items.sort(key = takeSecond,reverse=True) for i in range(20): item=items[i] keyWord =item[0] count=item[1] print("{0:<10}{1:>5}".format(keyWord,count)) createWordCloud(str(items[0:20])) main()
在代码中有以下几个错误:
1. 第一行代码应该在同一行,不能出现换。
2. 第二行代码需要加上空格。
3. 第四行代码需要加上空格。
4. 第18行代码需要加上空格。
5. 第27行代码`file.close`后需要加上括号。
6. 第37行代码`file.close`后需要加上括号。
7. 第51行代码的`range(20)`应该改为`range(len(items))`。
8. 第54行代码需要加上括号。
这是修改后的代码:
```
import jieba
import wordcloud
def takeSecond(elem):
return elem[1]
def createWordCloud(text):
w=wordcloud.WordCloud(font_path="msyh.ttf", width=1000, height=500, background_color="white").generate(text)
w.to_file("西游记词云图.jpg")
def main():
path = "西游记.txt"
file = open(path, "r", encoding="utf-8")
text = file.read()
file.close()
words = jieba.lcut(text)
counts = {}
for word in words:
if len(word) == 1:
continue
elif word == "大圣" or word == "老孙" or word == "行者" or word == "孙大圣" or word == "孙行者" or word == "猴王" or word == "悟空" or word == "齐天大圣" or word == "猴子":
rword = "孙悟空"
elif word == "师父" or word == "三藏" or word == "圣僧":
rword = "唐僧"
elif word == "呆子" or word == "八戒" or word == "老猪":
rword = "猪八戒"
elif word == "沙和尚":
rword = "沙僧"
elif word == "妖精" or word == "妖魔" or word == "妖道":
rword = "妖怪"
elif word == "佛祖":
rword = "如来"
elif word == "三太子":
rword = "白马"
else:
rword = word
counts[rword] = counts.get(rword, 0) + 1
file = open("excludes.txt", "r", encoding="utf-8")
excludes = file.read().split(",")
file.close()
for delWord in excludes:
try:
del counts[delWord]
except:
continue
items = list(counts.items())
items.sort(key=takeSecond, reverse=True)
for i in range(len(items)):
item = items[i]
keyWord = item[0]
count = item[1]
print("{0:<10}{1:>5}".format(keyWord, count))
createWordCloud(str(items[0:20]))
main()
```
用python完成实践(中文词频统计):对中文词频统计项目(import jieba import wordcloud class ChineseWordCounter: def __init__(self): self.content = '' self.words = [] self.word_frequencies = {} def readfile_demo_with(self, filename): with open(filename, encoding='utf-8') as fp: self.content = fp.read() def seperate_words(self): ignore_word = ('的', '与', '个', '和') for word in jieba.cut(self.content, cut_all=False): if (len(word) > 1) and (word not in ignore_word): self.words.append(word) def count_words(self): for word in self.words: if word in self.word_frequencies.keys(): self.word_frequencies[word] += 1 else: self.word_frequencies[word] = 1 def draw_wordcloud(self, img_file): font = 'c:/windows/font/方正粗黑宋简体.ttf' cloudengine = wordcloud.WordCloud(font_path=font) cloudengine.generate(" ".join(self.words)) cloudengine.to_file(img_file))进行修改,把统计出的词频保存到指定的csv文件中。
你可以按照以下方式修改代码,将统计出的词频保存到指定的CSV文件中:
```python
import csv
import jieba
import wordcloud
class ChineseWordCounter:
def __init__(self):
self.content = ''
self.words = []
self.word_frequencies = {}
def readfile_demo_with(self, filename):
with open(filename, encoding='utf-8') as fp:
self.content = fp.read()
def separate_words(self):
ignore_word = ('的', '与', '个', '和')
for word in jieba.cut(self.content, cut_all=False):
if (len(word) > 1) and (word not in ignore_word):
self.words.append(word)
def count_words(self):
for word in self.words:
if word in self.word_frequencies.keys():
self.word_frequencies[word] += 1
else:
self.word_frequencies[word] = 1
def save_word_frequencies_to_csv(self, csv_file):
with open(csv_file, mode='w', encoding='utf-8', newline='') as file:
writer = csv.writer(file)
writer.writerow(['Word', 'Frequency'])
for word, freq in self.word_frequencies.items():
writer.writerow([word, freq])
def draw_wordcloud(self, img_file):
font = 'c:/windows/font/方正粗黑宋简体.ttf'
cloudengine = wordcloud.WordCloud(font_path=font)
cloudengine.generate(" ".join(self.words))
cloudengine.to_file(img_file)
```
你可以调用 `save_word_frequencies_to_csv` 方法,将词频保存到指定的 CSV 文件中。例如,假设你的实例名为 `counter`,你可以通过 `counter.save_word_frequencies_to_csv('word_frequencies.csv')` 来保存词频到 `word_frequencies.csv` 文件中。