An example of C + + data structure file compression (Huffman tree)
- 2020-05-26 09:49:57
- OfStack
An example of file compression (Huffman tree) for C + + data structures
Summary:
Project introduction: Huffman coding method is used to compress the files, and the compressed files can be decompressed
Development environment: windows vs2013
Project overview:
1. The compression
a. Reads the file and forms a Huffman tree for each character, the number and weight of that character
b. Huffman tree is constructed with a small heap. The node pointer with fewer characters exists at the top of the heap, while the node pointer with more characters exists at the bottom of the heap
c. Take two Numbers at a time at the top of the heap and add them to the heap until the heap is finished, at which point the Huffman tree is built
d. Gets the Huffman encoding from the Huffman tree, and then gets the encoding of the characters that appear based on the entire character array
e. After acquiring the encoding, write the encoding string to the compressed file every time the encoding is filled to 8 bits (value can process the encoding 1 and it, and only move 0 bits)
f. Write the configuration file, count each character and its occurrence times, and save it to the configuration file in the form of "character +','+ times"
2. Unzip
a. Read the configuration file and count the number of all characters
b. Build a Huffman tree, read the unzipped file, and write the characters contained in the node of the encoded character you read to the unzipped file until you have finished the zipped file
c. Complete compression and decompression for testing small files and large files
Example code:
#pragma once
#include<vector>
template<class T>
struct Less
{
bool operator()(const T& l, const T& r) const
{
return l < r;
}
};
template<class T>
struct Greater
{
bool operator()(const T& l, const T& r) const
{
return l > r;
}
};
template<class T, class Compare>
class Heap
{
public:
Heap()
{}
Heap(T* array, size_t n) // Building the heap
{
_array.reserve(n);
for (size_t i = 0; i < n; i++)
{
_array.push_back(array[i]);
}
for (int i = (_array.size() - 2) >> 1; i >= 0; --i)
{
_AdjustDown(i);
}
}
const T& Top()const
{
return _array[0];
}
void Push(const T& x)
{
_array.push_back(x);
_AdjustUp(_array.size() - 1);
}
size_t Size()
{
return _array.size();
}
void Pop()
{
assert(_array.size() > 0);
swap(_array[0], _array[_array.size() - 1]);
_array.pop_back();
_AdjustDown(0);
}
bool Empty()
{
return _array.size() == 0;
}
void Print()
{
for (size_t i = 0; i < _array.size(); ++i)
{
cout << _array[i] << " ";
}
cout << endl;
}
protected:
void _AdjustUp(int child) // To raise
{
Compare ComFunc;
int parent = (child - 1) >> 1;
while (child)
{
if (ComFunc(_array[child], _array[parent]))
{
swap(_array[child], _array[parent]);
child = parent;
parent = (child - 1) >> 1;
}
else
{
break;
}
}
}
void _AdjustDown(int root) // cut
{
Compare ComFunc;
int parent = root;
int child = root * 2 + 1;
while (child < _array.size())
{
if (child + 1 < _array.size() && ComFunc(_array[child + 1], _array[child]))
{
++child;
}
if (ComFunc(_array[child], _array[parent]))
{
swap(_array[child], _array[parent]);
parent = child;
child = parent * 2 + 1;
}
else
{
break;
}
}
}
protected:
vector<T> _array;
};
void TestHeap()
{
int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 };
//Heap<int> heap(a, sizeof(a) / sizeof(a[0]));
//Heap<int, Less<int>> heap(a, sizeof(a) / sizeof(a[0]));
Heap<int, Greater<int>> heap(a, sizeof(a) / sizeof(a[0]));
heap.Print();
heap.Push(25);
heap.Print();
heap.Pop();
heap.Print();
}
#pragma once
#include"Heap.h"
template<class T>
struct HuffmanTreeNode
{
HuffmanTreeNode<T>* _left;
HuffmanTreeNode<T>* _right;
HuffmanTreeNode<T>* _parent;
T _w; // A weight
HuffmanTreeNode(const T& x)
:_w(x)
, _left(NULL)
, _right(NULL)
, _parent(NULL)
{}
};
template<class T>
class HuffmanTree
{
typedef HuffmanTreeNode<T> Node;
public:
HuffmanTree()
:_root(NULL)
{}
HuffmanTree(T* a, size_t n, const T& invalid = T()) // Construct a Huffman tree
{
struct Compare
{
bool operator()(Node* l, Node* r) const
{
return l->_w < r->_w;
}
};
Heap<Node*, Compare> minHeap;
for (size_t i = 0; i < n; ++i)
{
if (a[i] != invalid)
{
minHeap.Push(new Node(a[i]));
}
}
while (minHeap.Size() > 1)
{
Node* left = minHeap.Top();
minHeap.Pop();
Node* right = minHeap.Top();
minHeap.Pop();
Node* parent = new Node(left->_w + right->_w);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent = parent;
minHeap.Push(parent);
}
_root = minHeap.Top();
}
Node* GetRoot()
{
return _root;
}
~HuffmanTree()
{
_Destory(_root);
}
protected:
void _Destory(Node* root)
{
if (root == NULL)
{
return;
}
_Destory(root->_left);
_Destory(root->_right);
delete root;
}
HuffmanTree(const HuffmanTree<T>& tree);
HuffmanTree& operator=(const HuffmanTree<T>& tree);
protected:
Node* _root;
};
void TestHuffmanTree()
{<pre code_snippet_id="2340790" snippet_file_name="blog_20170418_2_3778260" name="code" class="cpp">#pragma once
#include<iostream>
using namespace std;
#include<assert.h>
#include"HuffmanTree.h"
typedef long long LongType;
struct CharInfo
{
unsigned char _ch; // character
LongType _count; // The number of times a character appears
string _code; //huffman coding
CharInfo(LongType count = 0)
:_count(count)
, _ch(0)
, _code("")
{}
bool operator<(const CharInfo& info) const
{
return _count < info._count;
}
CharInfo operator+(const CharInfo& info)
{
return CharInfo(_count + info._count);
}
bool operator!=(const CharInfo& info)const
{
return _count != info._count;
}
};
struct CountInfo
{
unsigned char _ch; // character
LongType _count; // The number of times a character appears
};
class FileCompress
{
public:
FileCompress()
{
for (size_t i = 0; i < 256; ++i)
{
_info[i]._ch = i;
}
}
void Compress(const char* filename)
{
assert(filename);
// The number of occurrences of the characters is counted 2 Read and write in base mode
FILE* fout = fopen(filename, "rb");
assert(fout);
int ch = fgetc(fout);
string CompressFile = filename;
CompressFile += ".huffman";
FILE* fin = fopen(CompressFile.c_str(), "wb");
assert(fin);
CountInfo info;
while (ch != EOF)
{
_info[(unsigned char)ch]._count++;
ch = fgetc(fout);
}
// build huffman tree
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_info, 256, invalid);
// generate huffman code
GetHuffmanCode(tree.GetRoot());
// The compression
// Write configuration files
for (size_t i = 0; i < 256; ++i)
{
if (_info[i]._count)
{
info._ch = _info[i]._ch;
info._count = _info[i]._count;
fwrite(&info, sizeof(info), 1, fin);
}
}
info._count = -1;
fwrite(&info, sizeof(info), 1, fin);
fseek(fout, 0, SEEK_SET); // Return to the start of the file
ch = fgetc(fout);
char value = 0; //2 Into the system
int pos = 0; // Left shift number
while (ch != EOF)
{
string& code = _info[(unsigned char)ch]._code;
size_t i = 0;
for (i = 0; i < code.size(); ++i)
{
value <<= 1;
++pos;
if (code[i] == '1')
{
value |= 1;
}
if (pos == 8) // full 8 Bits are written into the file
{
fputc(value, fin);
value = 0;
pos = 0;
}
}
ch = fgetc(fout);
}
if (pos)
{
value <<= (8 - pos);
fputc(value, fin);
}
fclose(fin);
fclose(fout);
}
void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root) //huffman coding
{
if (root == NULL)
{
return;
}
if (root->_left == NULL && root->_right == NULL)
{
HuffmanTreeNode<CharInfo> *parent, *cur;
cur = root;
parent = cur->_parent;
string& code = _info[(unsigned char)root->_w._ch]._code;
while (parent)
{
if (cur == parent->_left)
{
code += '0';
}
else
{
code += '1';
}
cur = parent;
parent = cur->_parent;
}
reverse(code.begin(), code.end());
}
GetHuffmanCode(root->_left);
GetHuffmanCode(root->_right);
}
// Unpack the
void UnCompress(const char* filename)
{
assert(filename);
string UnCompressFile = filename;
size_t index = UnCompressFile.rfind('.');
assert(index != string::npos);
UnCompressFile = UnCompressFile.substr(0, index);
UnCompressFile += ".unhuffman";
FILE* fout = fopen(filename, "rb");
assert(fout);
FILE* fin = fopen(UnCompressFile.c_str(), "wb");
assert(fin);
CountInfo info;
fread(&info, sizeof(CountInfo), 1, fout);
// Read configuration information
while (1)
{
fread(&info, sizeof(CountInfo), 1, fout);
if (info._count == -1)
{
break;
}
_info[(unsigned char)info._ch]._ch = info._ch;
_info[(unsigned char)info._ch]._count = info._count;
}
// The reconstruction huffman The tree
CharInfo invalid;
invalid._count = 0;
HuffmanTree<CharInfo> tree(_info, 256, invalid);
HuffmanTreeNode<CharInfo>* root = tree.GetRoot();
HuffmanTreeNode<CharInfo>* cur = root;
LongType count = root->_w._count;
int value = fgetc(fout);
if (value == EOF) // only 1 Kind of character
{
if (info._ch != 0)
{
while (cur->_w._count--)
{
fputc(cur->_w._ch, fin);
}
}
}
else
{
while (value != EOF)
{
int pos = 7;
char test = 1;
while (pos >= 0)
{
if (value & (test << pos))
{
cur = cur->_right;
}
else
{
cur = cur->_left;
}
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_w._ch, fin);
cur = root;
if (--count == 0)
{
break;
}
}
--pos;
}
value = fgetc(fout);
}
}
fclose(fout);
fclose(fin);
}
protected:
CharInfo _info[256]; // All character information
};
void TestFileCompress()
{
FileCompress fc;
//fc.Compress(" The experiment .doc");
//fc.UnCompress(" The experiment .doc.huffman");
//fc.Compress("qq.JPG");
//fc.UnCompress("qq.JPG.huffman");
//fc.Compress("www.MP3");
//fc.UnCompress("www.MP3.huffman");
fc.Compress("yppppp.txt");
fc.UnCompress("yppppp.txt.huffman");
}</pre><br>
int array[10] = { 2, 4, 6, 9, 7, 8, 5, 0, 1, 3 };HuffmanTree<int> t(array, 10);}
<pre></pre>
<p></p>
<pre></pre>
<p></p>
<p></p>
<p></p>
<pre code_snippet_id="2340790" snippet_file_name="blog_20170418_3_1128302" name="code" class="cpp">#include <iostream>
#include <assert.h>
#include <windows.h>
using namespace std;
#include"Heap.h"
#include"HuffmanTree.h"
#include"FileCompress.h"
int main()
{
//TestHeap();
TestHuffmanTree();
TestFileCompress();
system("pause");
return 0;
}</pre><br>
<br>
<p></p>
<p><br>
</p>
<p><br>
</p>
<link rel="stylesheet" href="http://static.blog.csdn.net/public/res-min/markdown_views.css?v=1.0" rel="external nofollow" >
The above is Huffman tree example details, if you have questions please leave a message or to the site community communication, thank you for reading, hope to help you, thank you for the support of the site!