An example of C + + data structure file compression (Huffman tree)

  • 2020-05-26 09:49:57
  • OfStack

An example of file compression (Huffman tree) for C + + data structures

Summary:

Project introduction: Huffman coding method is used to compress the files, and the compressed files can be decompressed

Development environment: windows vs2013

Project overview:

1. The compression

a. Reads the file and forms a Huffman tree for each character, the number and weight of that character

b. Huffman tree is constructed with a small heap. The node pointer with fewer characters exists at the top of the heap, while the node pointer with more characters exists at the bottom of the heap

c. Take two Numbers at a time at the top of the heap and add them to the heap until the heap is finished, at which point the Huffman tree is built

d. Gets the Huffman encoding from the Huffman tree, and then gets the encoding of the characters that appear based on the entire character array

e. After acquiring the encoding, write the encoding string to the compressed file every time the encoding is filled to 8 bits (value can process the encoding 1 and it, and only move 0 bits)

f. Write the configuration file, count each character and its occurrence times, and save it to the configuration file in the form of "character +','+ times"

2. Unzip

a. Read the configuration file and count the number of all characters

b. Build a Huffman tree, read the unzipped file, and write the characters contained in the node of the encoded character you read to the unzipped file until you have finished the zipped file

c. Complete compression and decompression for testing small files and large files

Example code:


#pragma once 
#include<vector> 
 
template<class T> 
struct Less 
{ 
  bool operator()(const T& l, const T& r) const 
  { 
    return l < r; 
  } 
}; 
 
template<class T> 
struct Greater 
{ 
  bool operator()(const T& l, const T& r) const 
  { 
    return l > r; 
  } 
}; 
 
template<class T, class Compare> 
class Heap 
{ 
public: 
  Heap() 
  {} 
 
  Heap(T* array, size_t n)   // Building the heap  
  { 
    _array.reserve(n); 
 
    for (size_t i = 0; i < n; i++) 
    { 
      _array.push_back(array[i]); 
    } 
 
    for (int i = (_array.size() - 2) >> 1; i >= 0; --i) 
    { 
      _AdjustDown(i); 
    } 
  } 
 
  const T& Top()const 
  { 
    return _array[0]; 
  } 
 
  void Push(const T& x) 
  { 
    _array.push_back(x); 
    _AdjustUp(_array.size() - 1); 
  } 
 
  size_t Size() 
  { 
    return _array.size(); 
  } 
 
  void Pop() 
  { 
    assert(_array.size() > 0); 
    swap(_array[0], _array[_array.size() - 1]); 
    _array.pop_back(); 
    _AdjustDown(0); 
  } 
 
  bool Empty() 
  { 
    return _array.size() == 0; 
  } 
 
  void Print() 
  { 
    for (size_t i = 0; i < _array.size(); ++i) 
    { 
      cout << _array[i] << " "; 
    } 
    cout << endl; 
  } 
 
protected: 
  void _AdjustUp(int child)  // To raise  
  { 
    Compare ComFunc; 
    int parent = (child - 1) >> 1; 
    while (child) 
    { 
      if (ComFunc(_array[child], _array[parent])) 
      { 
        swap(_array[child], _array[parent]); 
        child = parent; 
        parent = (child - 1) >> 1; 
      } 
      else 
      { 
        break; 
      } 
    } 
  } 
 
 
 
 
  void _AdjustDown(int root)   // cut  
  { 
    Compare ComFunc; 
    int parent = root; 
    int child = root * 2 + 1; 
    while (child < _array.size()) 
    { 
      if (child + 1 < _array.size() && ComFunc(_array[child + 1], _array[child])) 
      { 
        ++child; 
      } 
 
      if (ComFunc(_array[child], _array[parent])) 
      { 
        swap(_array[child], _array[parent]); 
        parent = child; 
        child = parent * 2 + 1; 
      } 
      else 
      { 
        break; 
      } 
    } 
  } 
 
 
 
protected: 
  vector<T> _array; 
 
 
}; 
 
 
 
 
void TestHeap() 
{ 
  int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 }; 
  //Heap<int> heap(a, sizeof(a) / sizeof(a[0])); 
  //Heap<int, Less<int>> heap(a, sizeof(a) / sizeof(a[0])); 
  Heap<int, Greater<int>> heap(a, sizeof(a) / sizeof(a[0])); 
  heap.Print(); 
  heap.Push(25); 
  heap.Print(); 
  heap.Pop(); 
  heap.Print(); 
} 





#pragma once 
#include"Heap.h" 
 
template<class T> 
struct HuffmanTreeNode 
{ 
  HuffmanTreeNode<T>* _left; 
  HuffmanTreeNode<T>* _right; 
  HuffmanTreeNode<T>* _parent; 
  T _w;       // A weight  
 
 
  HuffmanTreeNode(const T& x) 
    :_w(x) 
    , _left(NULL) 
    , _right(NULL) 
    , _parent(NULL) 
  {} 
 
}; 
 
template<class T> 
class HuffmanTree 
{ 
  typedef HuffmanTreeNode<T> Node; 
public: 
  HuffmanTree() 
    :_root(NULL) 
  {} 
 
  HuffmanTree(T* a, size_t n, const T& invalid = T())  // Construct a Huffman tree  
  { 
    struct Compare 
    { 
      bool operator()(Node* l, Node* r) const 
      { 
        return l->_w < r->_w; 
      } 
    }; 
 
    Heap<Node*, Compare> minHeap; 
    for (size_t i = 0; i < n; ++i) 
    { 
      if (a[i] != invalid) 
      { 
        minHeap.Push(new Node(a[i])); 
      } 
    } 
 
    while (minHeap.Size() > 1) 
    { 
      Node* left = minHeap.Top(); 
      minHeap.Pop(); 
      Node* right = minHeap.Top(); 
      minHeap.Pop(); 
      Node* parent = new Node(left->_w + right->_w); 
      parent->_left = left; 
      parent->_right = right; 
      left->_parent = parent; 
      right->_parent = parent; 
      minHeap.Push(parent); 
    } 
    _root = minHeap.Top(); 
  } 
 
 
  Node* GetRoot() 
  { 
    return _root; 
  } 
 
  ~HuffmanTree() 
  { 
    _Destory(_root); 
  } 
 
 
 
 
 
protected: 
  void _Destory(Node* root) 
  { 
    if (root == NULL) 
    { 
      return; 
    } 
 
    _Destory(root->_left); 
    _Destory(root->_right); 
    delete root; 
  } 
 
  HuffmanTree(const HuffmanTree<T>& tree); 
  HuffmanTree& operator=(const HuffmanTree<T>& tree); 
 
protected: 
  Node* _root; 
 
}; 
 
 
void TestHuffmanTree() 
{<pre code_snippet_id="2340790" snippet_file_name="blog_20170418_2_3778260" name="code" class="cpp">#pragma once 
 
#include<iostream> 
using namespace std; 
#include<assert.h> 
#include"HuffmanTree.h" 
 
 
typedef long long LongType; 
struct CharInfo    
{ 
  unsigned char _ch;     // character  
  LongType _count;     // The number of times a character appears  
  string _code;      //huffman coding  
 
  CharInfo(LongType count = 0) 
    :_count(count) 
    , _ch(0) 
    , _code("") 
  {} 
 
 
  bool operator<(const CharInfo& info) const 
  { 
    return _count < info._count; 
  } 
 
  CharInfo operator+(const CharInfo& info) 
  { 
    return CharInfo(_count + info._count); 
  } 
 
  bool operator!=(const CharInfo& info)const 
  { 
    return _count != info._count; 
  } 
}; 
 
 
struct CountInfo 
{ 
  unsigned char _ch;    // character  
  LongType _count;     // The number of times a character appears  
}; 
 
 
 
 
class FileCompress 
{ 
public: 
  FileCompress() 
  { 
    for (size_t i = 0; i < 256; ++i) 
    { 
      _info[i]._ch = i; 
    } 
  } 
 
  void Compress(const char* filename) 
  { 
    assert(filename); 
     
    // The number of occurrences of the characters is counted 2 Read and write in base mode  
    FILE* fout = fopen(filename, "rb"); 
    assert(fout); 
    int ch = fgetc(fout); 
 
    string CompressFile = filename; 
    CompressFile += ".huffman"; 
    FILE* fin = fopen(CompressFile.c_str(), "wb"); 
    assert(fin); 
    CountInfo info; 
 
 
    while (ch != EOF) 
    { 
      _info[(unsigned char)ch]._count++; 
      ch = fgetc(fout); 
    } 
 
    // build huffman tree 
    CharInfo invalid; 
    invalid._count = 0; 
    HuffmanTree<CharInfo> tree(_info, 256, invalid); 
 
    // generate huffman code 
    GetHuffmanCode(tree.GetRoot()); 
 
    // The compression  
    // Write configuration files  
    for (size_t i = 0; i < 256; ++i) 
    { 
      if (_info[i]._count) 
      { 
        info._ch = _info[i]._ch; 
        info._count = _info[i]._count; 
        fwrite(&info, sizeof(info), 1, fin); 
      } 
    } 
 
    info._count = -1; 
    fwrite(&info, sizeof(info), 1, fin); 
 
    fseek(fout, 0, SEEK_SET);   // Return to the start of the file  
    ch = fgetc(fout); 
    char value = 0;     //2 Into the system  
    int pos = 0;    // Left shift number  
    while (ch != EOF) 
    { 
      string& code = _info[(unsigned char)ch]._code; 
      size_t i = 0; 
      for (i = 0; i < code.size(); ++i) 
      { 
        value <<= 1; 
        ++pos; 
        if (code[i] == '1') 
        { 
          value |= 1; 
        } 
        if (pos == 8)       // full 8 Bits are written into the file  
        { 
          fputc(value, fin); 
          value = 0; 
          pos = 0; 
        } 
      } 
      ch = fgetc(fout); 
    } 
    if (pos) 
    { 
      value <<= (8 - pos); 
      fputc(value, fin); 
    } 
    fclose(fin); 
    fclose(fout); 
  } 
 
  void GetHuffmanCode(HuffmanTreeNode<CharInfo>* root)    //huffman coding  
  { 
    if (root == NULL) 
    { 
      return; 
    } 
 
    if (root->_left == NULL && root->_right == NULL) 
    { 
      HuffmanTreeNode<CharInfo> *parent, *cur; 
      cur = root; 
      parent = cur->_parent; 
      string& code = _info[(unsigned char)root->_w._ch]._code; 
      while (parent) 
      { 
        if (cur == parent->_left) 
        { 
          code += '0'; 
        } 
        else 
        { 
          code += '1'; 
        } 
        cur = parent; 
        parent = cur->_parent; 
      } 
      reverse(code.begin(), code.end()); 
    } 
    GetHuffmanCode(root->_left); 
    GetHuffmanCode(root->_right); 
  } 
 
  // Unpack the  
  void UnCompress(const char* filename) 
  { 
    assert(filename); 
    string UnCompressFile = filename; 
    size_t index = UnCompressFile.rfind('.'); 
    assert(index != string::npos); 
    UnCompressFile = UnCompressFile.substr(0, index); 
    UnCompressFile += ".unhuffman"; 
    FILE* fout = fopen(filename, "rb"); 
    assert(fout); 
    FILE* fin = fopen(UnCompressFile.c_str(), "wb"); 
    assert(fin); 
    CountInfo info; 
    fread(&info, sizeof(CountInfo), 1, fout); 
 
    // Read configuration information  
    while (1) 
    { 
      fread(&info, sizeof(CountInfo), 1, fout); 
      if (info._count == -1) 
      { 
        break; 
      } 
      _info[(unsigned char)info._ch]._ch = info._ch; 
      _info[(unsigned char)info._ch]._count = info._count; 
    } 
 
    // The reconstruction huffman The tree  
    CharInfo invalid; 
    invalid._count = 0; 
    HuffmanTree<CharInfo> tree(_info, 256, invalid); 
    HuffmanTreeNode<CharInfo>* root = tree.GetRoot(); 
    HuffmanTreeNode<CharInfo>* cur = root; 
    LongType count = root->_w._count; 
    int value = fgetc(fout); 
 
    if (value == EOF)       // only 1 Kind of character  
    { 
      if (info._ch != 0) 
      { 
        while (cur->_w._count--) 
        { 
          fputc(cur->_w._ch, fin); 
        } 
      } 
    } 
    else 
    { 
      while (value != EOF) 
      { 
        int pos = 7; 
        char test = 1; 
        while (pos >= 0) 
        { 
          if (value & (test << pos)) 
          { 
            cur = cur->_right; 
          } 
          else 
          { 
            cur = cur->_left; 
          } 
          if (cur->_left == NULL && cur->_right == NULL) 
          { 
            fputc(cur->_w._ch, fin); 
            cur = root; 
            if (--count == 0) 
            { 
              break; 
            } 
          } 
          --pos; 
        } 
        value = fgetc(fout); 
      } 
    } 
 
    fclose(fout); 
    fclose(fin); 
  } 
 
protected: 
  CharInfo _info[256];   // All character information  
}; 
 
void TestFileCompress() 
{ 
  FileCompress fc; 
  //fc.Compress(" The experiment .doc"); 
  //fc.UnCompress(" The experiment .doc.huffman"); 
  //fc.Compress("qq.JPG"); 
  //fc.UnCompress("qq.JPG.huffman"); 
  //fc.Compress("www.MP3"); 
  //fc.UnCompress("www.MP3.huffman"); 
 
  fc.Compress("yppppp.txt"); 
  fc.UnCompress("yppppp.txt.huffman"); 
}</pre><br> 
int array[10] = { 2, 4, 6, 9, 7, 8, 5, 0, 1, 3 };HuffmanTree<int> t(array, 10);} 
<pre></pre> 
<p></p> 
<pre></pre> 
<p></p> 
<p></p> 
<p></p> 
<pre code_snippet_id="2340790" snippet_file_name="blog_20170418_3_1128302" name="code" class="cpp">#include <iostream> 
#include <assert.h> 
#include <windows.h> 
using namespace std; 
 
#include"Heap.h" 
#include"HuffmanTree.h" 
#include"FileCompress.h" 
 
int main() 
{ 
  //TestHeap();   
  TestHuffmanTree(); 
  TestFileCompress(); 
 
 
  system("pause"); 
  return 0; 
}</pre><br> 
<br> 
<p></p> 
<p><br> 
</p> 
<p><br> 
</p> 
<link rel="stylesheet" href="http://static.blog.csdn.net/public/res-min/markdown_views.css?v=1.0" rel="external nofollow" > 

The above is Huffman tree example details, if you have questions please leave a message or to the site community communication, thank you for reading, hope to help you, thank you for the support of the site!


Related articles: