import csv import jieba.posseg as pseg import jieba import paddle path = r'D:/明朝collection.txt' with open(path, 'r', encoding='utf-8') as f: text = f.read() jieba.enable_paddle() words = pseg.cut(text, use_paddle=True) counts = {} for word, flag in words: if len(word) == 1: continue else: counts[word] = counts.get(word, 0) + 1 items = list(counts.items()) items.sort(key=lambda x: x[1], reverse=True) # 将结果写入CSV文件 with open('D:/output.csv', 'w', encoding='utf-8-sig', newline='') as f: writer = csv.writer(f) writer.writerow(['Word', 'Count', 'Flag']) # 写入表头 for i in range(500): word, count = items[i] print("{0:<5}{1:>5}".format(word, count)) words_with_flag = pseg.cut(word, use_paddle=True) for w, f in words_with_flag: print(f) writer.writerow([word, count,flag])
时间: 2024-04-08 16:34:58 浏览: 166
jieba-0_jieba安装0.4.1_jieba-0.42.1.tar_jieba-0.42.1.tar.gz_jieba-
5星 · 资源好评率100%
您修改的代码已经添加了将词性输出到CSV文件的功能,并且将文件路径修改为了`D:/output.csv`。修改后的代码如下:
```python
import csv
import jieba.posseg as pseg
import jieba
import paddle
path = r'D:/明朝collection.txt'
with open(path, 'r', encoding='utf-8') as f:
text = f.read()
jieba.enable_paddle()
words = pseg.cut(text, use_paddle=True)
counts = {}
for word, flag in words:
if len(word) == 1:
continue
else:
counts[word] = counts.get(word, 0) + 1
items = list(counts.items())
items.sort(key=lambda x: x[1], reverse=True)
# 将结果写入CSV文件
with open('D:/output.csv', 'w', encoding='utf-8-sig', newline='') as f:
writer = csv.writer(f)
writer.writerow(['Word', 'Count', 'Flag']) # 写入表头
for i in range(500):
word, count = items[i]
print("{0:<5}{1:>5}".format(word, count))
words_with_flag = pseg.cut(word, use_paddle=True)
for w, flag in words_with_flag:
print(flag)
writer.writerow([word, count, flag])
```
这样修改后,代码将会将词语、词频以及词性信息输出到`D:/output.csv`文件中,并且在控制台中打印出词性信息。
阅读全文