文件压缩程序的设计与实现代码
时间: 2023-12-29 17:51:51 浏览: 70
以下是一个简单的文件压缩程序的设计与实现代码,采用哈夫曼编码进行压缩:
```python
import heapq
import os
class HuffmanNode:
def __init__(self, value=None, freq=0):
self.left = None
self.right = None
self.value = value
self.freq = freq
def __lt__(self, other):
return self.freq < other.freq
class HuffmanTree:
def __init__(self, data):
self.root = None
self.freq = {}
self.data = data
def build_tree(self):
for d in self.data:
if d not in self.freq:
self.freq[d] = 1
else:
self.freq[d] += 1
h = []
for key in self.freq:
heapq.heappush(h, HuffmanNode(key, self.freq[key]))
while len(h) > 1:
left = heapq.heappop(h)
right = heapq.heappop(h)
node = HuffmanNode(None, left.freq + right.freq)
node.left = left
node.right = right
heapq.heappush(h, node)
self.root = heapq.heappop(h)
def get_codes(self):
codes = {}
self._get_codes(self.root, '', codes)
return codes
def _get_codes(self, node, code, codes):
if node.value:
codes[node.value] = code
return
self._get_codes(node.left, code + '0', codes)
self._get_codes(node.right, code + '1', codes)
class Compressor:
def __init__(self, filename):
self.filename = filename
self.file_size = os.path.getsize(filename)
self.data = None
self.codes = None
self.encoded_data = None
self.padding = None
def compress(self):
with open(self.filename, 'rb') as f:
self.data = f.read()
ht = HuffmanTree(self.data)
ht.build_tree()
self.codes = ht.get_codes()
encoded_bits = ''
for d in self.data:
encoded_bits += self.codes[d]
self.padding = 8 - len(encoded_bits) % 8
encoded_bits += '0' * self.padding
self.encoded_data = bytearray()
for i in range(0, len(encoded_bits), 8):
byte = encoded_bits[i:i+8]
self.encoded_data.append(int(byte, 2))
with open(self.filename + '.compressed', 'wb') as f:
f.write(bytes([self.padding]))
f.write(self.encoded_data)
print(f'Compressed {self.filename} from {self.file_size} bytes to {len(self.encoded_data)} bytes.')
def decompress(self):
with open(self.filename + '.compressed', 'rb') as f:
self.padding = int.from_bytes(f.read(1), byteorder='big')
self.encoded_data = f.read()
encoded_bits = ''
for b in self.encoded_data:
encoded_bits += bin(b)[2:].rjust(8, '0')
encoded_bits = encoded_bits[:-self.padding]
codes = {v: k for k, v in self.codes.items()}
decoded_data = ''
i = 0
while i < len(encoded_bits):
j = i + 1
while encoded_bits[i:j] not in codes and j <= len(encoded_bits):
j += 1
decoded_data += codes[encoded_bits[i:j]]
i = j
with open(self.filename + '.decompressed', 'wb') as f:
f.write(decoded_data.encode())
print(f'Decompressed {self.filename} to {len(decoded_data)} bytes.')
```
使用示例:
```python
c = Compressor('test.txt')
c.compress()
c.decompress()
```
其中,`test.txt` 是需要压缩的文件名,程序将会生成 `test.txt.compressed` 作为压缩后的文件,以及 `test.txt.decompressed` 作为解压缩后的文件。
阅读全文