用C++实现bsdiff算法。
时间: 2023-10-22 10:08:09 浏览: 179
08_24_C++ 差分算法的分析
5星 · 资源好评率100%
bsdiff算法是一种用于生成两个二进文件之间差异的算法,常用于软件升级和增量更新。下面是一个C++实现的示例代码:
```c++
#include <fstream>
#include <vector>
typedef unsigned char uint8_t;
typedef unsigned int uint32_t;
typedef unsigned long long uint64_t;
const uint64_t MAX_LEN = 0xFFFFFFFFFFFFFFFF;
class bsdiff {
public:
static bool diff(const char* old_file, const char* new_file, const char* patch_file);
static bool patch(const char* old_file, const char* new_file, const char* patch_file);
private:
static void split(uint64_t* sa, uint8_t* old_data, uint64_t old_size);
static uint64_t search(uint8_t* new_data, uint64_t new_size, uint8_t* old_data, uint64_t old_size, uint64_t pos, uint64_t len, uint64_t* offset);
static void encode(std::ofstream& fout, uint64_t x);
static void copy(std::ofstream& fout, uint8_t* data, uint64_t size);
};
bool bsdiff::diff(const char* old_file, const char* new_file, const char* patch_file) {
std::ifstream old_f(old_file, std::ios::binary);
if (!old_f) return false;
old_f.seekg(0, std::ios::end);
uint64_t old_size = old_f.tellg();
old_f.seekg(0, std::ios::beg);
std::vector<uint8_t> old_data(old_size);
old_f.read(reinterpret_cast<char*>(&old_data[0]), old_size);
old_f.close();
std::ifstream new_f(new_file, std::ios::binary);
if (!new_f) return false;
new_f.seekg(0, std::ios::end);
uint64_t new_size = new_f.tellg();
new_f.seekg(0, std::ios::beg);
std::vector<uint8_t> new_data(new_size);
new_f.read(reinterpret_cast<char*>(&new_data[0]), new_size);
new_f.close();
std::ofstream patch_f(patch_file, std::ios::binary);
if (!patch_f) return false;
uint64_t* sa = new uint64_t[(old_size + 1) / 2];
split(sa, &old_data[0], old_size);
uint64_t i = 0;
uint64_t len = 0;
uint64_t pos = 0;
uint64_t last_offset = 0;
while (i < new_size) {
uint64_t offset = 0;
pos = search(&new_data[0], new_size, &old_data[0], old_size, sa[i], old_size - sa[i], &offset);
if (i + pos - last_offset >= MAX_LEN || pos == old_size) {
encode(patch_f, i - last_offset);
encode(patch_f, pos - last_offset);
copy(patch_f, &new_data[i], pos - last_offset);
last_offset = pos;
}
i += pos - sa[i];
}
encode(patch_f, i - last_offset);
encode(patch_f, new_size - last_offset);
copy(patch_f, &new_data[i], new_size - last_offset);
delete[] sa;
patch_f.close();
return true;
}
bool bsdiff::patch(const char* old_file, const char* new_file, const char* patch_file) {
std::ifstream old_f(old_file, std::ios::binary);
if (!old_f) return false;
old_f.seekg(0, std::ios::end);
uint64_t old_size = old_f.tellg();
old_f.seekg(0, std::ios::beg);
std::vector<uint8_t> old_data(old_size);
old_f.read(reinterpret_cast<char*>(&old_data[0]), old_size);
old_f.close();
std::ifstream patch_f(patch_file, std::ios::binary);
if (!patch_f) return false;
std::ofstream new_f(new_file, std::ios::binary);
if (!new_f) return false;
uint64_t old_pos = 0;
uint64_t new_pos = 0;
uint64_t cmd = 0;
uint64_t len = 0;
while (patch_f) {
cmd = 0;
len = 0;
patch_f.read(reinterpret_cast<char*>(&cmd), sizeof(uint64_t));
patch_f.read(reinterpret_cast<char*>(&len), sizeof(uint64_t));
if (patch_f.eof()) break;
if (cmd > 0) {
std::vector<uint8_t> diff_data(len);
patch_f.read(reinterpret_cast<char*>(&diff_data[0]), len);
for (uint64_t i = 0; i < len; i++) {
new_f.put(old_data[old_pos + i] + diff_data[i]);
}
old_pos += len;
new_pos += len;
} else {
new_pos += len;
}
}
patch_f.close();
new_f.close();
return true;
}
void bsdiff::split(uint64_t* sa, uint8_t* old_data, uint64_t old_size) {
uint64_t i = 0;
uint64_t j = 0;
uint64_t k = 0;
uint64_t x = 0;
uint64_t y = 0;
uint64_t tmp = 0;
uint64_t* v = new uint64_t[old_size];
for (i = 0; i < old_size; i++) {
v[i] = i;
}
for (i = 0; i < old_size; i++) {
sa[i] = v[0];
v[0] = v[1];
x = v[1];
for (j = 1; j < old_size - i - 1; j++) {
y = v[j + 1];
if (old_data[sa[i] + j] > old_data[x + j]) {
v[j] = x;
x = y;
} else if (old_data[sa[i] + j] > old_data[y + j]) {
v[j] = sa[i] + j;
x = y;
} else {
v[j] = y;
}
}
v[old_size - i - 2] = x;
}
for (i = 0; i < old_size; i++) {
sa[v[i]] = i;
}
delete[] v;
}
uint64_t bsdiff::search(uint8_t* new_data, uint64_t new_size, uint8_t* old_data, uint64_t old_size, uint64_t pos, uint64_t len, uint64_t* offset) {
uint64_t start = 0;
uint64_t end = old_size;
uint64_t x = 0;
uint64_t y = 0;
uint64_t last = 0;
while (start < end) {
x = start + (end - start) / 2;
last = (x > *offset) ? x - *offset : *offset - x;
y = 0;
while (y < len && x + y < old_size && new_data[pos + y] == old_data[x + y]) {
y++;
}
if (y == len) {
*offset = x;
return x - *offset + len;
}
if (y > last) {
end = x;
} else {
start = x + 1;
}
}
return 0;
}
void bsdiff::encode(std::ofstream& fout, uint64_t x) {
uint8_t buf[10];
uint32_t i = 0;
while (x >= 0x80) {
buf[i++] = (x & 0x7F) | 0x80;
x >>= 7;
}
buf[i++] = x & 0x7F;
for (uint32_t j = 0; j < i; j++) {
fout.put(buf[j]);
}
}
void bsdiff::copy(std::ofstream& fout, uint8_t* data, uint64_t size) {
for (uint64_t i = 0; i < size; i++) {
fout.put(data[i]);
}
}
```
该实现使用了STL中的vector和fstream库,代码较为简洁易读。其中,split函数用于计算old_data数组的后缀数组,search函数用于在old_data数组中查找与new_data数组匹配的最长子串。encode和copy函数用于将数据写入输出流中。diff函数用于生成差异文件,patch函数用于根据差异文件生成新文件。
阅读全文