编程实现Shannon编码、Huffman编码。 输入:信源概率分布。 输出:Shannon码、Huffman码,平均码长,编码效率。
时间: 2024-02-06 12:09:05 浏览: 60
以下是Python的实现:
```python
import heapq
from math import log2
class Node:
def __init__(self, symbol=None, prob=0, left=None, right=None):
self.symbol = symbol
self.prob = prob
self.left = left
self.right = right
def __lt__(self, other):
return self.prob < other.prob
def shannon_fano(p, symbols):
def shannon_fano_rec(low, high):
if low + 1 == high:
return
mid = low + 1
while mid < high and abs(prob_sum[mid - 1] - prob_sum[high - 1] + prob_sum[low - 1]) <= abs(
prob_sum[mid] - prob_sum[high - 1] + prob_sum[low - 1]):
mid += 1
for i in range(low, mid):
codes[i] += '0'
for i in range(mid, high):
codes[i] += '1'
shannon_fano_rec(low, mid)
shannon_fano_rec(mid, high)
n = len(p)
prob_sum = [0] * n
codes = [''] * n
for i in range(n):
prob_sum[i] = p[i] if i == 0 else prob_sum[i - 1] + p[i]
shannon_fano_rec(0, n)
code_dict = {symbols[i]: codes[i] for i in range(n)}
return code_dict
def huffman(p, symbols):
n = len(p)
q = [Node(symbols[i], p[i]) for i in range(n)]
heapq.heapify(q)
for i in range(n - 1):
left = heapq.heappop(q)
right = heapq.heappop(q)
new_node = Node(prob=left.prob + right.prob, left=left, right=right)
heapq.heappush(q, new_node)
code_dict = {}
def traverse(node, prefix=''):
if node.symbol is not None:
code_dict[node.symbol] = prefix
else:
traverse(node.left, prefix + '0')
traverse(node.right, prefix + '1')
root = heapq.heappop(q)
traverse(root)
return code_dict
def calc_avg_code_length(code_dict, p):
return sum([len(code_dict[symbol]) * p[i] for i, symbol in enumerate(code_dict.keys())])
def calc_efficiency(h, l):
return h / l * 100
if __name__ == '__main__':
p = [0.1, 0.15, 0.3, 0.16, 0.29]
symbols = ['A', 'B', 'C', 'D', 'E']
shannon_code = shannon_fano(p, symbols)
print('Shannon编码: ', shannon_code)
huffman_code = huffman(p, symbols)
print('Huffman编码: ', huffman_code)
h_shannon = calc_avg_code_length(shannon_code, p)
h_huffman = calc_avg_code_length(huffman_code, p)
print('平均码长 - Shannon: ', h_shannon)
print('平均码长 - Huffman: ', h_huffman)
entropy = sum([-p[i] * log2(p[i]) for i in range(len(p))])
print('熵: ', entropy)
eff_shannon = calc_efficiency(entropy, h_shannon)
eff_huffman = calc_efficiency(entropy, h_huffman)
print('编码效率 - Shannon: {:.2f}%'.format(eff_shannon))
print('编码效率 - Huffman: {:.2f}%'.format(eff_huffman))
```
输入为信源概率分布,其中p是每个符号出现的概率,symbols是每个符号的名称。输出为Shannon码、Huffman码、平均码长和编码效率。其中平均码长是根据概率分布和编码计算的,编码效率是熵与平均码长的比值。
阅读全文