An example of java implementing Huffman compression

  • 2020-08-22 22:05:54
  • OfStack

Principle of Huffman compression:

The 8-bit 01 string is converted to a shorter Huffman code by counting the frequency of each byte in the file.

Among them, Huffman encoding is constructed according to the frequency of occurrence of bytes in the file. The higher the frequency of occurrence of bytes, the shorter the path length;

The lower the frequency of the byte, the longer the path length, so as to achieve the purpose of compression.

How to construct Huffman trees

1. Define the byte classes

My byte class defines 1 property


   public int data;// Node data  
public int weight;// The weight of the node  
public int point;// The left and right position of the node  0- On the left  1- right  
private NodeData parent;// Parent node reference  
private NodeData left;// Left node reference  
private NodeData right;// Right node reference  

2. Build Huffman trees

1. Define an array to store byte information: int array_Bytes[256];

Where the subscript [0,256) of the array represents byte values (1 byte 8-bit, whose value is in the range [0,256)); the array stores the number of occurrences of the byte.

2. Traverse the file to be compressed and count the occurrence times of bytes.


 InputStream data = new FileInputStream(path); 
 /********  Number of characters in the file  ********/ 
 int number = data.available(); 
 for (int i = 0; i < number; i++) { 
int b = data.read(); 
array_Bytes[b] ++; 
 } 
data.close(); 

3. Store the byte class object on the priority queue

4. Use HashMap to construct code table

Huffman compression code is as follows:

Class 1 byte


package compressFile; 
/** 
 *  Node data  
 *  Functions: Define data, and its Huffman coding  
 * @author Andrew 
 * 
 */ 
public class NodeData { 
  public NodeData(){ 
     
  } 
  public int data;// Node data  
  public int weight;// The weight of the node  
  public int point;// The left and right position of the node  0- On the left  1- right  
  private NodeData parent; 
  private NodeData left; 
  private NodeData right; 
   
  public int getData(){ 
    return data; 
  } 
  public NodeData getParent() { 
    return parent; 
  } 
  public void setParent(NodeData parent) { 
    this.parent = parent; 
  } 
  public NodeData getLeft() { 
    return left; 
  } 
  public void setLeft(NodeData left) { 
    this.left = left; 
  } 
  public NodeData getRight() { 
    return right; 
  } 
  public void setRight(NodeData right) { 
    this.right = right; 
  } 
} 

2. Count classes of bytes and generate code tables


package compressFile; 
 
import java.io.BufferedInputStream; 
import java.io.FileInputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.util.ArrayList; 
import java.util.Comparator; 
import java.util.HashMap; 
import java.util.List; 
import java.util.Map; 
import java.util.PriorityQueue; 
 
 
 
/** 
 *  Counts the number of bytes per file specified   Function: Define array, write the byte type and number in the file to array  
 *  Create priority queues and Huffman trees  
 * @author Andrew 
 * 
 */ 
public class StatisticBytes { 
 
   
  /** 
   *  The first 1 Step:  
   *  The method of counting bytes in a file  
   * 
   * @param path 
   *       The path of the file being counted  
   * @return  Byte count array  
   */ 
  private int[] statistic(String path) { 
    /****** An array that stores byte of data -- The index value represents the byte type - The store value represents the weight value ******/ 
    int[] array_Bytes = new int[256]; 
    try { 
      InputStream data = new FileInputStream(path); 
      BufferedInputStream buffered = new BufferedInputStream(data); 
      /********  Number of characters in the file  ********/ 
      int number = data.available(); 
      System.out.println(" Number of Bytes" "+number); 
      for (int i = 0; i < number; i++) { 
        int b = data.read(); 
        array_Bytes[b] ++; 
      } 
       
      data.close(); 
    } catch (IOException e) { 
      e.printStackTrace(); 
    } 
    return array_Bytes; 
  } 
  /** 
   *  The first 2 Step:  
   *  Create a priority queue based on the byte array of statistics  
   * @param array  Statistics an array of file bytes  
   * @return     Priority queue  
   */ 
  private PriorityQueue<NodeData> createQueue(int[] array){ 
    // Define a priority queue to sort data by weight from small to large  
    PriorityQueue<NodeData> queue =  
        new PriorityQueue<NodeData>(array.length,new Comparator<NodeData>(){ 
      public int compare(NodeData o1, NodeData o2) { 
        return o1.weight - o2.weight; 
      } 
    }); 
     
    for(int i=0; i<array.length; i++){ 
      if(0 != array[i]){ 
        NodeData node = new NodeData(); 
        node.data = i;// Sets the data for this node  
        node.weight = array[i];// Sets the weight of the node  
        queue.add(node); 
      } 
    } 
    return queue; 
  } 
  /** 
   *  The first 3 Step:  
   *  Create a Huffman tree based on the priority queue  
   * @param queue   Priority queue   
   * @return      Huffman root node  
   */ 
  private NodeData createTree(PriorityQueue<NodeData> queue){ 
    while(queue.size() > 1){ 
       
      NodeData left = queue.poll();// Get the left node  
      NodeData right = queue.poll();// Get the right node  
       
      NodeData root = new NodeData();// Create a new node  
      root.weight = left.weight + right.weight; 
       
      root.setLeft(left); 
      root.setRight(right); 
      left.setParent(root); 
      right.setParent(root); 
       
      left.point = 0; 
      right.point = 1; 
       
      queue.add(root); 
    } 
    NodeData firstNode = queue.poll(); 
    return firstNode; 
  } 
  /** 
   *  The first 4 Step:  
   *  Find the leaf node and obtain the Huffman code  
   * @param root   The root node  
   */ 
  private void achievehfmCode(NodeData root){ 
     
    if(null == root.getLeft() && null == root.getRight()){ 
      array_Str[root.data] = this.achieveLeafCode(root); 
      /** 
       * 
       *  Gets the length of the corpus after converting the file to Huffman coding  
       *  The length of the file  = 1 The length of the species code  *  Number of occurrences of the code  
       */ 
      WriteFile.size_File += (array_Str[root.data].length())*(root.weight); 
    } 
    if(null != root.getLeft()){ 
      achievehfmCode(root.getLeft()); 
    } 
    if(null != root.getRight()){ 
      achievehfmCode(root.getRight()); 
    } 
  } 
  /** 
   *  According to a certain leaf node, the Huffman code of the leaf node is obtained  
   * @param leafNode   Leaf node object  
   */ 
  private String achieveLeafCode(NodeData leafNode){ 
    String str = ""; 
    /***************** Storage nodes  01  Encoded queue ****************/ 
    List<Integer> list_hfmCode = new ArrayList<Integer>(); 
    list_hfmCode.add(leafNode.point); 
    NodeData parent = leafNode.getParent(); 
     
    while(null != parent){ 
      list_hfmCode.add(parent.point); 
      parent = parent.getParent(); 
    } 
     
    int size = list_hfmCode.size(); 
    for(int i=size-2; i>=0; i--){ 
      str += list_hfmCode.get(i); 
    } 
    System.out.println(leafNode.weight+" Huffman code for >>>"+str); 
    return str; 
  } 
  /** 
   *  The first 5 Step:  
   *  Based on the obtained Huffman table   clock  
   * Integer  As a byte byte [0~256) 
   * String  Code for Huffman  
   * @return  clock  
   */ 
  public Map<Integer,String> createMap(){ 
    int[] hfm_Codes = this.statistic("F:\\JAVA\\ Before compression .txt");// Get file byte information  
    PriorityQueue<NodeData> queue = this.createQueue(hfm_Codes);// Get priority queue  
    /* 
     *  Get the root node of the Huffman tree,  
     * this.createTree(queue) After the method call ( contains poll()) . queue.size()=0 
     */ 
    NodeData root = this.createTree(queue); 
    this.achievehfmCode(root);// Get the Huffman code and store it in an array  
     
    Map<Integer,String> map = new HashMap<Integer,String>(); 
    for(int i=0; i<256; i++){ 
      if(null != array_Str[i]){ 
        map.put(i, array_Str[i]); 
      } 
    } 
    return map; 
  } 
  /** 
   *  An array that stores byte Huffman encoding  
   *  Subscript: Byte data  
   *  Element: Huffman encoding  
   */ 
  public String[] array_Str = new String[256]; 
} 

3. Write the code table to the compressed file and compress the text


package compressFile; 
 
import java.io.BufferedInputStream; 
import java.io.BufferedOutputStream; 
import java.io.DataOutputStream; 
import java.io.FileInputStream; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.util.Iterator; 
import java.util.Map; 
import java.util.Set; 
 
/** 
 *  Write the code table and file to a new file  
 * @author Andrew 
 * 
 */ 
public class WriteFile { 
 
  //  Instantiate the create code table class object  
  private StatisticBytes statistic = new StatisticBytes(); 
  private Map<Integer, String> map = statistic.createMap();//  Get the meter  
  //  Byte receive variable, receive Huffman code connection enough 8 Bytes converted at bit time  
  private int exmpCode; 
  public static int size_File; 
 
  public static void main(String[] args) { 
    WriteFile writeFile = new WriteFile(); 
    writeFile.init(); 
  } 
 
  private void init() { 
 
    String filePath = "F:\\JAVA\\ After the compression .txt"; 
    this.writeFile(filePath); 
  } 
 
  /** 
   *  The first 1 Step:   Writes a code table to a file  
   * 
   * @param dataOut 
   *       Basic data flow  
   */ 
  private void writeCodeTable(DataOutputStream dataOut) { 
    Set<Integer> set = map.keySet(); 
    Iterator<Integer> p = set.iterator(); 
 
    try { 
      // The length of a code table written to a file  
      dataOut.writeInt(map.size()); 
      while (p.hasNext()) { 
        Integer key = p.next(); 
        String hfmCode = map.get(key); 
 
        dataOut.writeInt(key);// Write a byte  
        // The length of a Huffman code  
        dataOut.writeByte(hfmCode.length()); 
        for(int i=0; i<hfmCode.length(); i++){ 
          dataOut.writeChar(hfmCode.charAt(i));// Write the Huffman code  
        } 
      } 
    } catch (IOException e) { 
      e.printStackTrace(); 
    } 
  } 
 
  /** 
   *  The first 2 Step:   Writes an encoding to a compressed file  
   * 
   * @param path 
   */ 
  private void writeFile(String path) { 
 
    try { 
      //  The input stream  
      FileInputStream in = new FileInputStream("F:\\JAVA\\ Before compression .txt"); 
      BufferedInputStream bIn = new BufferedInputStream(in); 
      //  The output stream  
      FileOutputStream out = new FileOutputStream(path); 
      DataOutputStream dataOut = new DataOutputStream(out); 
      BufferedOutputStream bOut = new BufferedOutputStream(out); 
      //  Writes a code table to a file  
      this.writeCodeTable(dataOut); 
      /******************** Write the number of zeros *********************/ 
      if(0 != size_File % 8){ 
        dataOut.writeByte(8 - (size_File % 8)); 
      } 
       
      String transString = "";// Intermediate string , Store strings up to size Is greater than 8 
      String waiteString = "";// Convert string , 
       
      int size_File = in.available(); 
      for(int i=0; i<size_File; i++){ 
        int j = bIn.read(); 
        System.out.println("]]]]]]]]]]]>>"); 
        waiteString = this.changeStringToByte(transString + statistic.array_Str[j]); 
        if(waiteString != ""){  
          bOut.write(exmpCode); 
          transString = waiteString; 
        }else{ 
          transString += statistic.array_Str[j]; 
        } 
      } 
      if("" != transString){ 
        int num = 8-transString.length()%8; 
        for(int i=0; i<num; i++){ 
          transString += 0; 
        } 
      } 
      transString = this.changeStringToByte(transString); 
      bOut.write(exmpCode); 
       
      bIn.close(); 
      bOut.flush(); 
      bOut.close(); 
      out.close(); 
    } catch (IOException e) { 
      e.printStackTrace(); 
    } 
  } 
 
  /** 
   *  Attached to the first 2 Step:  
   *  Converts the string to byte 
   * 
   * @param str 
   *       The string to be transformed  
   * @return  if str Is greater than 8 return 1 Before an interception 8 After a str 
   *      Otherwise returns "" 
   */ 
  private String changeStringToByte(String str) { 
    if (8 <= str.length()) { 
      exmpCode = ((str.charAt(0) - 48) * 128 
          + (str.charAt(1) - 48) * 64 + (str.charAt(2) - 48) * 32 
          + (str.charAt(3) - 48) * 16 + (str.charAt(4) - 48) * 8 
          + (str.charAt(5) - 48) * 4 + (str.charAt(6) - 48) * 2  
          + (str.charAt(7) - 48)); 
      str = str.substring(8); 
      return str; 
    } 
    return ""; 
  } 
} 

2. Huffman decompression

Principle: Reverse the compression, that is, how you compress how to decompress. Equivalent to you according to their own definition of the protocol for compression and decompression of the file.

The code is as follows:


package decompressionFile; 
 
import java.io.DataInputStream; 
import java.io.FileInputStream; 
import java.io.FileOutputStream; 
import java.io.IOException; 
import java.io.InputStream; 
import java.io.OutputStream; 
import java.util.ArrayList; 
import java.util.HashMap; 
import java.util.Iterator; 
import java.util.List; 
import java.util.Map; 
import java.util.Set; 
 
/** 
 *  Unzip the files   Read the data from the compressed file and extract it  
 * 
 * @author Andrew 
 * 
 */ 
public class ReadFile { 
  /** 
   *  clock  Integter:  byte  [0,255) String :   Huffman coding  
   */ 
  private Map<Integer, String> code_Map = new HashMap<Integer, String>(); 
 
  public static void main(String[] args) { 
    ReadFile readFile = new ReadFile(); 
    readFile.readFile(); 
  } 
 
  /** 
   *  The first 1 Step:   Read the code table from a file  
   * 
   * @param dataInput 
   *       Basic data input stream  
   * 
   */ 
  private void readMap(DataInputStream dataInput) { 
 
    try { 
      //  Read the length of the code meter  
      int size_Map = dataInput.readInt(); 
      /****************  Read code meter information  ************************************/ 
      for (int i = 0; i < size_Map; i++) { 
        int data = dataInput.readInt();//  Read in byte information  
        String str = "";//  Huffman coding  
        //  Huffman code length , Writes as a character when stored  
        byte codeSize = dataInput.readByte(); 
        for (byte j = 0; j < codeSize; j++) { 
          str += dataInput.readChar(); 
        } 
        System.out.println("&&&&&&&&&&>>>>"+data+"!!!!!!!>>"+str); 
        code_Map.put(data, str); 
      } 
      /***************************************************************/ 
    } catch (IOException e) { 
      e.printStackTrace(); 
    } 
  } 
 
  /** 
   *  The first 2 Step:   Unzip official documents  
   */ 
  private void readFile() { 
    try { 
      //  File input stream  
      InputStream input = new FileInputStream("F:\\JAVA\\ After the compression .txt"); 
      // BufferedInputStream bIn = new BufferedInputStream(input); 
      DataInputStream dInput = new DataInputStream(input); 
      //  Calls a method that reads the code table  
      this.readMap(dInput); 
      byte zerofill = dInput.readByte();//  Read the number of file filling zero  
      System.out.println(" The number of zeros to be filled -- >>>>"+zerofill); 
      //  File output stream  
      OutputStream out = new FileOutputStream("F:\\JAVA\\ After decompression .txt"); 
       
      String transString = "";// Receives a string used to match the Huffman encoding in the code table  
      String waiteString = "";// Receives the remaining string from the truncated Huffman encoding  
       
      /*********************************** A method that does not consume memory *****************************************/ 
      int readCode = input.read();// Read from a compressed file 1 A data  
      int size = input.available(); 
      for(int j=0; j<=size; j++){ 
        System.out.println("readCodereadCodereadCode "" >>>>"+readCode); 
        waiteString += this.changeIntToBinaryString(readCode);// Convert read integers to 01 string  
        for(int i=0; i<waiteString.length(); i++){ 
          // Will be read from the file 01 string 1 a 1 A 2-byte interception addition is compared to the Huffman encoding in the code table  
          transString += waiteString.charAt(i); 
          if(this.searchHC(transString, out)){ 
//           waiteString = waiteString.substring( i+1 ); 
            transString = ""; 
          } 
        } 
        waiteString = ""; 
        readCode = input.read(); 
        if(j == size-1){ 
          break; 
        } 
      } 
      /************************************ Deal with the last 1 A word ***************************************/ 
//     int lastByte = input.read(); 
      String lastWord = this.changeIntToBinaryString(readCode); 
      waiteString = transString + lastWord.substring(0, 8-zerofill); 
      transString = ""; 
      for(int i=0; i<waiteString.length(); i++){ 
        // Will be read from the file 01 string 1 a 1 A 2-byte interception addition is compared to the Huffman encoding in the code table  
        transString += waiteString.charAt(i); 
        if(this.searchHC(transString, out)){ 
//         waiteString = waiteString.substring( i+1 ); 
          transString = ""; 
        } 
      } 
//     this.searchHC(transString, out); 
      /*********************************** Queue method, memory consumption ****************************************/ 
//     int readCode = input.read();// Read from a compressed file 1 A data  
//     List<Character> list = new ArrayList<Character>(); 
//     while(-1 != readCode){ 
//       String str = this.changeIntToBinaryString(readCode); 
//       for(int i=0; i<str.length(); i++){ 
//         list.add(str.charAt(i)); 
//       } 
//       readCode = input.read(); 
//     } 
//     for(int j=0; j<list.size()-zerofill; j++){ 
//       transString += list.get(j); 
//       if(this.searchHC(transString, out)){ 
//         transString = ""; 
//       } 
//     } 
      /*****************************************************************************************/ 
      input.close(); 
      out.close(); 
    } catch (IOException e) { 
      e.printStackTrace(); 
    } 
  } 
  /** 
   *  Will read from the file 01  Strings are compared with Huffman codes in code tables  
   *  If the code table contains the corresponding Huffman code, the corresponding code table  
   *  Data is written to the unzipped file or not written  
   * @param str  I read it from a file 01  string  
   * @param out  File output stream  
   * @return    Return if write to file true Otherwise return false 
   * @throws IOException  An exception is thrown when a write error occurs  
   */ 
  private boolean searchHC(String str, OutputStream out) throws IOException{ 
    Set<Integer> set = code_Map.keySet(); 
    Iterator<Integer> p = set.iterator(); 
    while (p.hasNext()) { 
      Integer key = p.next();// Gets byte data from the code table  
      String hfmCode = code_Map.get(key);// Get the Huffman code  
      if(hfmCode.equals(str)){ 
        out.write(key); 
        return true; 
      } 
    } 
    return false; 
  } 
  /** 
   *  According to the  " In addition to 2 You take mod, you put it in reverse order " method  
   *  will 10 Convert to base number 2 Base string  
   * @param a   The number to convert  
   * @return  01  string  
   */ 
  private String changeIntToBinaryString(int a) { 
    int b = a; 
    int count = 0; // record  a  Can be converted into 01 The number of strings, in not enough 8 It's easy to fill in the blanks  
    String str = "";//  The reverse 2 Base string  
    String exmpStr = "";//  The order 2 Base string  
 
    while (a != 0) { 
      b = a/2; 
      if (a % 2 != 0) { 
        str += 1; 
      } else { 
        str += 0; 
      } 
      a = b; 
      count++; 
    } 
    // Not enough 8 Zero in place of a bit  
    for (int i = 0; i < 8 - count; i++) { 
      str += 0; 
    } 
    // It's going to be converted 2 Base string positive order  
    for (int j = 7; j >= 0; j--) { 
      System.out.print(str.charAt(j)); 
      exmpStr += str.charAt(j); 
    } 
    System.out.println(" The converted string >>>>>>>>>"+exmpStr); 
    return exmpStr; 
  } 
} 


Related articles: