Two methods of remodeling massive data by bitmap of bitmap in java

  • 2021-06-28 12:49:24
  • OfStack

Finding duplicate elements or removing duplicate elements from a large amount of data is a common text for interviews.Bitmaps can be used to solve such problems.For example, knowing that a file contains several phone numbers requires counting the number of different numbers, even sorting them within the time complexity of O (n).

Bitmaps require very little space (depending on the distribution of the data, but we can also process the data through a few layouts to make it dense) and are very efficient when data is dense.For example, the maximum 10-digit value that an 8-digit integer can represent is 999999. If each array corresponds to one bit bit, all 8-digit integers need to be stored: 99Mbit = 12.375MB.

In fact, java jdk 1.0 already provides the bitmap implementation BitSet class, but some of these methods are not available until jdk 1.4.

Next, I will implement the principle of bitmap by myself, and then implement bitmap by calling BitSet class of jdk directly, which is easy to understand:


package swordoffer;
// Remove duplication and sort 
import java.util.Arrays;
import java.util.BitSet;
import java.util.Random;
/**
 * @author Gavenyeah
 * @date Time:
 * @des:
 */
public class BitMap {
  int ARRNUM = 800;
  int LEN_INT = 32;
  int mmax = 9999;
  int mmin = 1000;
  int N = mmax - mmin + 1;
  public static void main(String args[]) {
     new BitMap().findDuplicate();
    new BitMap().findDup_jdk();
  }
  public void findDup_jdk() {
    System.out.println("******* call JDK Library methods in -- start ********");
    BitSet bitArray = new BitSet(N);
    int[] array = getArray(ARRNUM);
    for (int i = 0; i < ARRNUM; i++) {
      bitArray.set(array[i] - mmin);
    }
    int count = 0;
    for (int j = 0; j < bitArray.length(); j++) {
      if (bitArray.get(j)) {
        System.out.print(j + mmin + " ");
        count++;
      }
    }
    System.out.println();
    System.out.println(" The sorted array size is: " + count );
    System.out.println("******* call JDK Library methods in -- End ********");
  }
  public void findDuplicate() {
    int[] array = getArray(ARRNUM);
    int[] bitArray = setBit(array);
    printBitArray(bitArray);
  }
  public void printBitArray(int[] bitArray) {
    int count = 0;
    for (int i = 0; i < N; i++) {
      if (getBit(bitArray, i) != 0) {
        count++;
        System.out.print(i + mmin + "\t");
      }
    }
    System.out.println();
    System.out.println(" The size of the de-reordered array is: " + count);
  }
  public int getBit(int[] bitArray, int k) {// 1 Move Right  k % 32 position   And above   The array subscript is  k/32  Value of location 
    return bitArray[k / LEN_INT] & (1 << (k % LEN_INT));
  }
  public int[] setBit(int[] array) {//  Get array position subscript first  i/32,  Then?   Or above 
                    //  At this location int Type Numeric bit Bit: i % 32
    int m = array.length;
    int bit_arr_len = N / LEN_INT + 1;
    int[] bitArray = new int[bit_arr_len];
    for (int i = 0; i < m; i++) {
      int num = array[i] - mmin;
      bitArray[num / LEN_INT] |= (1 << (num % LEN_INT));
    }
    return bitArray;
  }
  public int[] getArray(int ARRNUM) {
    @SuppressWarnings("unused")
    int array1[] = { 1000, 1002, 1032, 1033, 6543, 9999, 1033, 1000 };
    int array[] = new int[ARRNUM];
    System.out.println(" Array size: " + ARRNUM);
    Random r = new Random();
    for (int i = 0; i < ARRNUM; i++) {
      array[i] = r.nextInt(N) + mmin;
    }
    System.out.println(Arrays.toString(array));
    return array;
  }
}

summary


Related articles: