一篇英文文章存储在一个文本文件中,然后分别基于线性表、二叉排序树和哈希表不同的存储结构,完成单词词频的统计和单词的检索功能。同时计算不同检索策略下的平均查找长度ASL代码
时间: 2024-01-21 20:19:33 浏览: 29
以下是基于线性表的单词词频统计和单词检索功能的代码:
```python
class Word:
def __init__(self, word):
self.word = word
self.freq = 1
class LinearTable:
def __init__(self):
self.table = []
def add_word(self, word):
for w in self.table:
if w.word == word:
w.freq += 1
return
self.table.append(Word(word))
def search_word(self, word):
for w in self.table:
if w.word == word:
return w.freq
return 0
file_name = "example.txt"
with open(file_name, "r") as f:
lt = LinearTable()
for line in f:
words = line.strip().split(" ")
for word in words:
lt.add_word(word)
freq = lt.search_word("example")
print("The frequency of 'example' is:", freq)
```
以下是基于二叉排序树的单词词频统计和单词检索功能的代码:
```python
class TreeNode:
def __init__(self, word):
self.word = word
self.freq = 1
self.left = None
self.right = None
class BST:
def __init__(self):
self.root = None
def add_word(self, word):
if self.root is None:
self.root = TreeNode(word)
return
cur = self.root
while cur:
if word == cur.word:
cur.freq += 1
return
elif word < cur.word:
if cur.left:
cur = cur.left
else:
cur.left = TreeNode(word)
return
else:
if cur.right:
cur = cur.right
else:
cur.right = TreeNode(word)
return
def search_word(self, word):
cur = self.root
while cur:
if word == cur.word:
return cur.freq
elif word < cur.word:
cur = cur.left
else:
cur = cur.right
return 0
file_name = "example.txt"
with open(file_name, "r") as f:
bst = BST()
for line in f:
words = line.strip().split(" ")
for word in words:
bst.add_word(word)
freq = bst.search_word("example")
print("The frequency of 'example' is:", freq)
```
以下是基于哈希表的单词词频统计和单词检索功能的代码:
```python
class HashTable:
def __init__(self):
self.table = {}
def add_word(self, word):
if word in self.table:
self.table[word] += 1
else:
self.table[word] = 1
def search_word(self, word):
if word in self.table:
return self.table[word]
else:
return 0
file_name = "example.txt"
with open(file_name, "r") as f:
ht = HashTable()
for line in f:
words = line.strip().split(" ")
for word in words:
ht.add_word(word)
freq = ht.search_word("example")
print("The frequency of 'example' is:", freq)
```
计算不同检索策略下的平均查找长度ASL,需要在每种数据结构的搜索函数中添加计算ASL的代码。例如,在线性表的搜索函数中,可以添加一个变量来记录查找过程中遍历的节点数,然后在函数返回之前计算ASL。
```python
class LinearTable:
def __init__(self):
self.table = []
def add_word(self, word):
for w in self.table:
if w.word == word:
w.freq += 1
return
self.table.append(Word(word))
def search_word(self, word):
asl = 0
for i, w in enumerate(self.table):
asl += 1
if w.word == word:
return w.freq, asl
return 0, asl
file_name = "example.txt"
with open(file_name, "r") as f:
lt = LinearTable()
for line in f:
words = line.strip().split(" ")
for word in words:
lt.add_word(word)
freq, asl = lt.search_word("example")
print("The frequency of 'example' is:", freq)
print("The ASL of the search is:", asl / len(lt.table))
```
在二叉排序树和哈希表中计算ASL也类似,只需要在搜索函数中添加遍历节点的计数器,并在函数返回之前计算ASL即可。