数据结构哈夫曼树课设
时间: 2023-11-26 21:48:17 浏览: 65
为了回答这个问题,我将介绍哈夫曼树的基本概念和构建过程,以及如何使用哈夫曼树进行编码。
哈夫曼树是一种特殊的二叉树,它的叶子节点对应于要编码的字符,而非叶子节点对应于权值。哈夫曼树的构建过程是这样的:
1. 统计文件中每个字符出现的频度。
2. 将每个字符及其频度作为一个节点,构建一个森林。
3. 从森林中选出两个权值最小的节点,将它们合并成一个新节点,权值为两个节点的权值之和,新节点作为一棵树的根节点,原来的两个节点作为新节点的左右子节点。
4. 将新节点插入到森林中,重复步骤3,直到森林中只剩下一棵树,这棵树就是哈夫曼树。
哈夫曼树构建完成后,就可以使用它来进行编码了。编码的过程是这样的:
1. 从哈夫曼树的根节点开始,如果当前节点是叶子节点,则输出该节点对应的字符的编码。
2. 如果当前节点不是叶子节点,则根据要编码的字符在哈夫曼树中的位置,向左或向右遍历哈夫曼树,直到找到叶子节点为止。
下面是一个Python实现的例子,假设要编码的文件名为input.txt,编码后的文件名为output.txt:
```python
import heapq
import os
class HuffmanNode:
def __init__(self, char, freq):
self.char = char
self.freq = freq
self.left = None
self.right = None
def __lt__(self, other):
return self.freq < other.freq
class HuffmanTree:
def __init__(self, text):
self.text = text
self.freq = {}
self.heap = []
self.codes = {}
self.reverse_codes = {}
def count_freq(self):
for char in self.text:
if char in self.freq:
self.freq[char] += 1
else:
self.freq[char] = 1
def make_heap(self):
for char in self.freq:
node = HuffmanNode(char, self.freq[char])
heapq.heappush(self.heap, node)
def merge_nodes(self):
while len(self.heap) > 1:
node1 = heapq.heappop(self.heap)
node2 = heapq.heappop(self.heap)
merged = HuffmanNode(None, node1.freq + node2.freq)
merged.left = node1
merged.right = node2
heapq.heappush(self.heap, merged)
def make_codes_helper(self, root, current_code):
if root is None:
return
if root.char is not None:
self.codes[root.char] = current_code
self.reverse_codes[current_code] = root.char
return
self.make_codes_helper(root.left, current_code + "0")
self.make_codes_helper(root.right, current_code + "1")
def make_codes(self):
root = heapq.heappop(self.heap)
current_code = ""
self.make_codes_helper(root, current_code)
def get_encoded_text(self):
encoded_text = ""
for char in self.text:
encoded_text += self.codes[char]
return encoded_text
def pad_encoded_text(self, encoded_text):
extra_padding = 8 - len(encoded_text) % 8
for i in range(extra_padding):
encoded_text += "0"
padded_info = "{0:08b}".format(extra_padding)
padded_encoded_text = padded_info + encoded_text
return padded_encoded_text
def get_byte_array(self, padded_encoded_text):
if len(padded_encoded_text) % 8 != 0:
print("Encoded text not padded properly")
exit(0)
b = bytearray()
for i in range(0, len(padded_encoded_text), 8):
byte = padded_encoded_text[i:i+8]
b.append(int(byte, 2))
return b
def compress(self):
self.count_freq()
self.make_heap()
self.merge_nodes()
self.make_codes()
encoded_text = self.get_encoded_text()
padded_encoded_text = self.pad_encoded_text(encoded_text)
b = self.get_byte_array(padded_encoded_text)
with open("output.txt", "wb") as output:
output.write(bytes(b))
print("Compressed")
return "output.txt"
def remove_padding(self, padded_encoded_text):
padded_info = padded_encoded_text[:8]
extra_padding = int(padded_info, 2)
padded_encoded_text = padded_encoded_text[8:]
encoded_text = padded_encoded_text[:-1*extra_padding]
return encoded_text
def decode_text(self, encoded_text):
current_code = ""
decoded_text = ""
for bit in encoded_text:
current_code += bit
if current_code in self.reverse_codes:
char = self.reverse_codes[current_code]
decoded_text += char
current_code = ""
return decoded_text
def decompress(self, input_path):
with open(input_path, "rb") as file:
bit_string = ""
byte = file.read(1)
while byte:
byte = ord(byte)
bits = bin(byte)[2:].rjust(8, '0')
bit_string += bits
byte = file.read(1)
encoded_text = self.remove_padding(bit_string)
decompressed_text = self.decode_text(encoded_text)
with open("decompressed.txt", "w") as output:
output.write(decompressed_text)
print("Decompressed")
return "decompressed.txt"
text = open("input.txt", "r").read()
tree = HuffmanTree(text)
compressed_file_path = tree.compress()
decompressed_file_path = tree.decompress(compressed_file_path)
```