Two methods of remodeling massive data by bitmap of bitmap in java
- 2021-06-28 12:49:24
- OfStack
Finding duplicate elements or removing duplicate elements from a large amount of data is a common text for interviews.Bitmaps can be used to solve such problems.For example, knowing that a file contains several phone numbers requires counting the number of different numbers, even sorting them within the time complexity of O (n).
Bitmaps require very little space (depending on the distribution of the data, but we can also process the data through a few layouts to make it dense) and are very efficient when data is dense.For example, the maximum 10-digit value that an 8-digit integer can represent is 999999. If each array corresponds to one bit bit, all 8-digit integers need to be stored: 99Mbit = 12.375MB.
In fact, java jdk 1.0 already provides the bitmap implementation BitSet class, but some of these methods are not available until jdk 1.4.
Next, I will implement the principle of bitmap by myself, and then implement bitmap by calling BitSet class of jdk directly, which is easy to understand:
package swordoffer;
// Remove duplication and sort
import java.util.Arrays;
import java.util.BitSet;
import java.util.Random;
/**
* @author Gavenyeah
* @date Time:
* @des:
*/
public class BitMap {
int ARRNUM = 800;
int LEN_INT = 32;
int mmax = 9999;
int mmin = 1000;
int N = mmax - mmin + 1;
public static void main(String args[]) {
new BitMap().findDuplicate();
new BitMap().findDup_jdk();
}
public void findDup_jdk() {
System.out.println("******* call JDK Library methods in -- start ********");
BitSet bitArray = new BitSet(N);
int[] array = getArray(ARRNUM);
for (int i = 0; i < ARRNUM; i++) {
bitArray.set(array[i] - mmin);
}
int count = 0;
for (int j = 0; j < bitArray.length(); j++) {
if (bitArray.get(j)) {
System.out.print(j + mmin + " ");
count++;
}
}
System.out.println();
System.out.println(" The sorted array size is: " + count );
System.out.println("******* call JDK Library methods in -- End ********");
}
public void findDuplicate() {
int[] array = getArray(ARRNUM);
int[] bitArray = setBit(array);
printBitArray(bitArray);
}
public void printBitArray(int[] bitArray) {
int count = 0;
for (int i = 0; i < N; i++) {
if (getBit(bitArray, i) != 0) {
count++;
System.out.print(i + mmin + "\t");
}
}
System.out.println();
System.out.println(" The size of the de-reordered array is: " + count);
}
public int getBit(int[] bitArray, int k) {// 1 Move Right k % 32 position And above The array subscript is k/32 Value of location
return bitArray[k / LEN_INT] & (1 << (k % LEN_INT));
}
public int[] setBit(int[] array) {// Get array position subscript first i/32, Then? Or above
// At this location int Type Numeric bit Bit: i % 32
int m = array.length;
int bit_arr_len = N / LEN_INT + 1;
int[] bitArray = new int[bit_arr_len];
for (int i = 0; i < m; i++) {
int num = array[i] - mmin;
bitArray[num / LEN_INT] |= (1 << (num % LEN_INT));
}
return bitArray;
}
public int[] getArray(int ARRNUM) {
@SuppressWarnings("unused")
int array1[] = { 1000, 1002, 1032, 1033, 6543, 9999, 1033, 1000 };
int array[] = new int[ARRNUM];
System.out.println(" Array size: " + ARRNUM);
Random r = new Random();
for (int i = 0; i < ARRNUM; i++) {
array[i] = r.nextInt(N) + mmin;
}
System.out.println(Arrays.toString(array));
return array;
}
}
summary