Python英文词频统计代码
时间: 2024-05-27 09:06:31 浏览: 150
以下是Python实现统计英文文章词频的方法,按照词频从高到低输出前十位的代码实现:
引用:
```python
def words_list():
with open(r'file_path', 'r') as f: # 将file_path替换成文章路径
words = f.read().split()
return words
def word_dic(words):
word_dict = {}
for word in words:
if word_dict.get(word): # 如果字典中已经有该单词,次数加1
word_dict[word] += 1
else:
word_dict[word] = 1 # 如果没有出现过,这个单词的计数为1
return word_dict
def word_fre(word_dict):
fre_dict = {}
for key, value in word_dict.items():
if value in fre_dict:
fre_dict[value].append(key) # 如果该单词出现次数已存在,将这个单词添加到次数的数组中
else:
fre_dict[value] = [key] # 如果该单词出现次数不存在,创建次数为key的数组,并将该单词添加进去
return fre_dict
def word_sort(fre_dict):
fre_list = list(fre_dict.keys())
fre_list.sort(reverse=True)
word_sort_list = []
for fre in fre_list:
words = fre_dict[fre]
if len(words) == 1:
word_sort_list.append(words + ':' + str(fre))
else:
for word in words:
word_sort_list.append(word + ':' + str(fre))
return word_sort_list
print(word_sort(word_fre(word_dic(words_list())))[-10:]) # 输出频率前十的单词
```
引用:
```python
import collections
def words_list():
with open(r'file_path', 'r') as f: # 将file_path替换成文章路径
words = f.read().split()
return words
words = words_list()
w = collections.Counter(words)
for word,cnt in w.most_common(10):
print(word + ':' + str(cnt))
```
阅读全文