哈弗曼编码压缩文件c语言实现
时间: 2023-04-02 21:01:42 浏览: 73
可以回答这个问题。哈弗曼编码是一种常用的数据压缩算法,可以通过统计字符出现频率,构建哈弗曼树,然后生成对应的编码表,将原始数据进行编码压缩。在 C 语言中,可以使用数组和结构体等数据结构来实现哈弗曼编码压缩文件。
相关问题
java实现lz77+哈弗曼压缩文件
以下是Java实现LZ77+哈夫曼压缩文件的代码示例:
```java
import java.io.*;
import java.util.*;
public class LZ77HuffmanCompressor {
private static final int WINDOW_SIZE = 2048;
private static final int LOOKAHEAD_BUFFER_SIZE = 64;
private static final int MAX_MATCH_LENGTH = 15;
private static final int MIN_MATCH_LENGTH = 3;
public static void compress(String inputFilePath, String outputFilePath) throws IOException {
// 读取输入文件
byte[] inputBytes = readBytesFromFile(inputFilePath);
// 初始化哈夫曼编码器
HuffmanEncoder huffmanEncoder = new HuffmanEncoder();
// 初始化输出流
BitOutputStream bitOutputStream = new BitOutputStream(new FileOutputStream(outputFilePath));
// 写入文件头
bitOutputStream.writeBits(0x4C5A3737, 32); // "LZ77"的ASCII码
bitOutputStream.writeBits(inputBytes.length, 32); // 原始文件长度
// 初始化滑动窗口和前瞻缓冲区
byte[] window = new byte[WINDOW_SIZE];
byte[] lookaheadBuffer = new byte[LOOKAHEAD_BUFFER_SIZE];
int windowStartIndex = 0;
int lookaheadBufferEndIndex = 0;
// LZ77压缩
while (lookaheadBufferEndIndex < inputBytes.length) {
// 查找最长匹配
int matchLength = 0;
int matchOffset = 0;
for (int i = Math.max(lookaheadBufferEndIndex - WINDOW_SIZE, 0); i < lookaheadBufferEndIndex; i++) {
int length = getMatchLength(inputBytes, i, lookaheadBuffer, lookaheadBufferEndIndex, MAX_MATCH_LENGTH);
if (length > matchLength) {
matchLength = length;
matchOffset = lookaheadBufferEndIndex - i;
}
}
// 写入匹配信息
if (matchLength >= MIN_MATCH_LENGTH) {
bitOutputStream.writeBits(1, 1); // 标记为匹配
bitOutputStream.writeBits(matchOffset - 1, 11); // 写入匹配偏移量
bitOutputStream.writeBits(matchLength - MIN_MATCH_LENGTH, 4); // 写入匹配长度
lookaheadBufferEndIndex += matchLength;
} else {
bitOutputStream.writeBits(0, 1); // 标记为字面量
huffmanEncoder.encode(inputBytes[lookaheadBufferEndIndex], bitOutputStream); // 写入字面量
lookaheadBufferEndIndex++;
}
// 更新滑动窗口和前瞻缓冲区
if (lookaheadBufferEndIndex - windowStartIndex > WINDOW_SIZE) {
windowStartIndex = lookaheadBufferEndIndex - WINDOW_SIZE;
}
System.arraycopy(inputBytes, windowStartIndex, window, 0, lookaheadBufferEndIndex - windowStartIndex);
System.arraycopy(inputBytes, lookaheadBufferEndIndex, lookaheadBuffer, 0, Math.min(LOOKAHEAD_BUFFER_SIZE, inputBytes.length - lookaheadBufferEndIndex));
windowStartIndex = Math.max(lookaheadBufferEndIndex - WINDOW_SIZE, 0);
}
// 写入文件尾
bitOutputStream.writeBits(1, 1); // 标记为结束
bitOutputStream.flush();
bitOutputStream.close();
}
public static void decompress(String inputFilePath, String outputFilePath) throws IOException {
// 读取输入文件
byte[] inputBytes = readBytesFromFile(inputFilePath);
// 初始化哈夫曼解码器
HuffmanDecoder huffmanDecoder = new HuffmanDecoder();
// 初始化输入流
BitInputStream bitInputStream = new BitInputStream(new FileInputStream(inputFilePath));
// 读取文件头
int magicNumber = bitInputStream.readBits(32);
if (magicNumber != 0x4C5A3737) {
throw new RuntimeException("Invalid file format");
}
int originalLength = bitInputStream.readBits(32);
// 初始化滑动窗口和前瞻缓冲区
byte[] window = new byte[WINDOW_SIZE];
byte[] lookaheadBuffer = new byte[LOOKAHEAD_BUFFER_SIZE];
int windowStartIndex = 0;
int lookaheadBufferEndIndex = 0;
// LZ77解压
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
while (true) {
int flag = bitInputStream.readBits(1);
if (flag == 0) { // 字面量
byte b = huffmanDecoder.decode(bitInputStream);
byteArrayOutputStream.write(b);
window[lookaheadBufferEndIndex % WINDOW_SIZE] = b;
lookaheadBufferEndIndex++;
} else if (flag == 1) { // 匹配
int matchOffset = bitInputStream.readBits(11) + 1;
int matchLength = bitInputStream.readBits(4) + MIN_MATCH_LENGTH;
for (int i = 0; i < matchLength; i++) {
byte b = window[(lookaheadBufferEndIndex - matchOffset + i) % WINDOW_SIZE];
byteArrayOutputStream.write(b);
window[lookaheadBufferEndIndex % WINDOW_SIZE] = b;
lookaheadBufferEndIndex++;
}
} else { // 结束
break;
}
// 更新滑动窗口和前瞻缓冲区
if (lookaheadBufferEndIndex - windowStartIndex > WINDOW_SIZE) {
windowStartIndex = lookaheadBufferEndIndex - WINDOW_SIZE;
}
System.arraycopy(inputBytes, windowStartIndex / 8, window, 0, (lookaheadBufferEndIndex - windowStartIndex + 7) / 8);
System.arraycopy(inputBytes, (lookaheadBufferEndIndex + 7) / 8, lookaheadBuffer, 0, Math.min(LOOKAHEAD_BUFFER_SIZE, (originalLength - lookaheadBufferEndIndex)));
windowStartIndex = Math.max(lookaheadBufferEndIndex - WINDOW_SIZE, 0);
}
// 写入输出文件
writeBytesToFile(outputFilePath, byteArrayOutputStream.toByteArray());
bitInputStream.close();
}
private static int getMatchLength(byte[] inputBytes, int inputIndex, byte[] lookaheadBuffer, int lookaheadBufferEndIndex, int maxLength) {
int length = 0;
while (length < maxLength && lookaheadBufferEndIndex + length < inputBytes.length && inputBytes[inputIndex + length] == lookaheadBuffer[lookaheadBufferEndIndex + length]) {
length++;
}
return length;
}
private static byte[] readBytesFromFile(String filePath) throws IOException {
FileInputStream fileInputStream = new FileInputStream(filePath);
byte[] bytes = fileInputStream.readAllBytes();
fileInputStream.close();
return bytes;
}
private static void writeBytesToFile(String filePath, byte[] bytes) throws IOException {
FileOutputStream fileOutputStream = new FileOutputStream(filePath);
fileOutputStream.write(bytes);
fileOutputStream.close();
}
private static class BitInputStream {
private final InputStream inputStream;
private int currentByte;
private int bitsRemaining;
public BitInputStream(InputStream inputStream) {
this.inputStream = inputStream;
this.currentByte = 0;
this.bitsRemaining = 0;
}
public int readBits(int numBits) throws IOException {
int result = 0;
while (numBits > 0) {
if (bitsRemaining == 0) {
currentByte = inputStream.read();
bitsRemaining = 8;
}
int bitsToRead = Math.min(numBits, bitsRemaining);
int shift = bitsRemaining - bitsToRead;
int mask = (1 << bitsToRead) - 1;
result |= (currentByte >> shift) & mask;
numBits -= bitsToRead;
bitsRemaining -= bitsToRead;
}
return result;
}
public void close() throws IOException {
inputStream.close();
}
}
private static class BitOutputStream {
private final OutputStream outputStream;
private int currentByte;
private int bitsRemaining;
public BitOutputStream(OutputStream outputStream) {
this.outputStream = outputStream;
this.currentByte = 0;
this.bitsRemaining = 8;
}
public void writeBits(int value, int numBits) throws IOException {
while (numBits > 0) {
int bitsToWrite = Math.min(numBits, bitsRemaining);
int shift = bitsRemaining - bitsToWrite;
int mask = (1 << bitsToWrite) - 1;
currentByte |= (value & mask) << shift;
numBits -= bitsToWrite;
bitsRemaining -= bitsToWrite;
if (bitsRemaining == 0) {
outputStream.write(currentByte);
currentByte = 0;
bitsRemaining = 8;
}
}
}
public void flush() throws IOException {
if (bitsRemaining < 8) {
outputStream.write(currentByte);
}
}
public void close() throws IOException {
flush();
outputStream.close();
}
}
private static class HuffmanEncoder {
private final Map<Byte, String> codeMap;
public HuffmanEncoder() {
codeMap = new HashMap<>();
}
public void encode(byte b, BitOutputStream bitOutputStream) throws IOException {
String code = codeMap.get(b);
if (code == null) {
throw new RuntimeException("Huffman code not found for byte: " + b);
}
for (char c : code.toCharArray()) {
bitOutputStream.writeBits(c - '0', 1);
}
}
public void buildCodeMap(byte[] inputBytes) {
Map<Byte, Integer> frequencyMap = new HashMap<>();
for (byte b : inputBytes) {
frequencyMap.put(b, frequencyMap.getOrDefault(b, 0) + 1);
}
PriorityQueue<Node> priorityQueue = new PriorityQueue<>();
for (Map.Entry<Byte, Integer> entry : frequencyMap.entrySet()) {
priorityQueue.offer(new Node(entry.getKey(), entry.getValue()));
}
while (priorityQueue.size() > 1) {
Node left = priorityQueue.poll();
Node right = priorityQueue.poll();
priorityQueue.offer(new Node(null, left.frequency + right.frequency, left, right));
}
Node root = priorityQueue.poll();
buildCodeMap(root, "");
}
private void buildCodeMap(Node node, String code) {
if (node.isLeaf()) {
codeMap.put(node.b, code);
} else {
buildCodeMap(node.left, code + "0");
buildCodeMap(node.right, code + "1");
}
}
private static class Node implements Comparable<Node> {
private final Byte b;
private final int frequency;
private final Node left;
private final Node right;
public Node(Byte b, int frequency) {
this.b = b;
this.frequency = frequency;
this.left = null;
this.right = null;
}
public Node(Byte b, int frequency, Node left, Node right) {
this.b = b;
this.frequency = frequency;
this.left = left;
this.right = right;
}
public boolean isLeaf() {
return left == null && right == null;
}
@Override
public int compareTo(Node other) {
return frequency - other.frequency;
}
}
}
private static class HuffmanDecoder {
private final Map<String, Byte> codeMap;
public HuffmanDecoder() {
codeMap = new HashMap<>();
}
public byte decode(BitInputStream bitInputStream) throws IOException {
StringBuilder stringBuilder = new StringBuilder();
while (true) {
int bit = bitInputStream.readBits(1);
stringBuilder.append(bit);
Byte b = codeMap.get(stringBuilder.toString());
if (b != null) {
return b;
}
}
}
public void buildCodeMap(byte[] inputBytes) {
Map<Byte, Integer> frequencyMap = new HashMap<>();
for (byte b : inputBytes) {
frequencyMap.put(b, frequencyMap.getOrDefault(b, 0) + 1);
}
PriorityQueue<Node> priorityQueue = new PriorityQueue<>();
for (Map.Entry<Byte, Integer> entry : frequencyMap.entrySet()) {
priorityQueue.offer(new Node(entry.getKey(), entry.getValue()));
}
while (priorityQueue.size() > 1) {
Node left = priorityQueue.poll();
Node right = priorityQueue.poll();
priorityQueue.offer(new Node(null, left.frequency + right.frequency, left, right));
}
Node root = priorityQueue.poll();
buildCodeMap(root, "");
}
private void buildCodeMap(Node node, String code) {
if (node.isLeaf()) {
codeMap.put(code, node.b);
} else {
buildCodeMap(node.left, code + "0");
buildCodeMap(node.right, code + "1");
}
}
private static class Node implements Comparable<Node> {
private final Byte b;
private final int frequency;
private final Node left;
private final Node right;
public Node(Byte b, int frequency) {
this.b = b;
this.frequency = frequency;
this.left = null;
this.right = null;
}
public Node(Byte b, int frequency, Node left, Node right) {
this.b = b;
this.frequency = frequency;
this.left = left;
this.right = right;
}
public boolean isLeaf() {
return left == null && right == null;
}
@Override
public int compareTo(Node other) {
return frequency - other.frequency;
}
}
}
}
```
c语言压缩文本文件字符
在C语言中,可以使用一些方法来压缩文本文件中的字符。
1. RLE压缩(Run-Length Encoding):这种方法基于统计连续出现的字符的个数。当字符连续出现时,可以将字符和次数进行编码,用更短的表示方式来代替重复的字符。例如,将"AAAABBBCCDAA"压缩为"4A3B2C1D2A"。
2. 字典压缩:创建一个字符字典,将每个字符映射到一个唯一的编码,然后用编码替代字符。可以使用哈弗曼编码或者LZW压缩算法来实现。哈弗曼编码是一种变长编码方式,出现频率高的字符使用较短的编码,出现频率低的字符使用较长的编码。LZW压缩算法则是在字典中不断添加字符组合,并用较短的编码表示较长的字符组合。
3. 使用压缩库:C语言提供了一些开源的压缩库,如zlib。这些库提供了一些函数和接口,让开发者可以在代码中调用来进行文本文件的压缩和解压缩操作。
以上是一些常用的方法来压缩文本文件中的字符,开发者可以根据需求选择合适的方法。在实际应用中,还需要考虑压缩后的文件大小、解压缩的速度以及压缩算法的复杂度等因素。