4. 《红楼梦》人物统计 编写程序,结合jieba库应用字符串及组合数据类型相关方法实现文本词频统计,输出《红楼梦》中,出场次数最高的10位人物,要求根据出场次数降序排列,显示人物姓名与次数
时间: 2024-05-19 20:14:08 浏览: 84
a.rar_人物名称频数统计_红楼梦
# 导入jieba库
import jieba
# 读取《红楼梦》文本
with open('hongloumeng.txt', 'r', encoding='utf-8') as f:
content = f.read()
# 使用jieba分词
words = jieba.lcut(content)
# 定义一个空字典,用于存储人物出场次数
freq = {}
# 遍历分词结果,统计人物出场次数
for i in range(len(words)-1):
if words[i] == '贾' and words[i+1] == '宝':
name = '贾宝玉'
elif words[i] == '林' and words[i+1] == '黛':
name = '林黛玉'
elif words[i] == '薛' and words[i+1] == '宝':
name = '薛宝钗'
elif words[i] == '王' and words[i+1] == '熙':
name = '王熙凤'
elif words[i] == '贾' and words[i+1] == '母':
name = '贾母'
elif words[i] == '王' and words[i+1] == '夫':
name = '王夫人'
elif words[i] == '邢' and words[i+1] == '夫':
name = '邢夫人'
elif words[i] == '尤' and words[i+1] == '二':
name = '尤二姐'
else:
continue
if name in freq:
freq[name] += 1
else:
freq[name] = 1
# 将字典按照值降序排列,并取前10名
top10 = sorted(freq.items(), key=lambda x:x[1], reverse=True)[:10]
# 输出结果
print('《红楼梦》中,出场次数最高的10位人物:')
for i, (name, count) in enumerate(top10):
print('第{}名:{},出场次数:{}'.format(i+1, name, count))
阅读全文