StringBuilder performance testing in Java

  • 2020-04-01 03:29:39
  • OfStack

As you look at the KMP algorithm, you want to make simple statistics about execution time and performance.

The conclusion is that Java String indexOf method has the best performance, followed by KMP algorithm, followed by the traditional BF algorithm, of course, the comparison is a bit far-fetched, the SUN algorithm also USES Java to achieve, with a look not like KMP, it needs to be studied in detail.

The test code is as follows:


package com.test.test.kmp;

import java.util.Random;

public class KMPTest {

	public static void main(String[] args) {
		test();
	}
	
	//
	public static void test() {
		//
		int allLen = 8000000;
		int subLen = 11;
		int charLen = 4;
		String cl = buildString(charLen);
		int times = 50;
		//
		testMatch(allLen, subLen, cl, times);
	}
	
	public static void testMatch(int allLen, int subLen, String cl, int times){
		int allBF = 0;
		int allString = 0;
		int allKMP = 0;
		int allGC = 0;
		int allbuild = 0;
		int alltoArray = 0;
		
		long start = System.currentTimeMillis();
		
		System.out.println("--------------------------");
		for (int i = 0; i < times; i++) {
			//1. Construct string loss
			long buildStart = System.currentTimeMillis();
			String sub = buildString(subLen, cl);
			String all = buildString(allLen, sub) +sub;
			long buildEnd = System.currentTimeMillis();
			allbuild += (buildEnd - buildStart);
			//2. Loss of toCharArray
			long toArrayStart = System.currentTimeMillis();
			char[] allArray = all.toCharArray();
			char[] subArray = sub.toCharArray();
			long toArrayEnd = System.currentTimeMillis();
			alltoArray += (toArrayEnd - toArrayStart);
			//
			long startBF = System.currentTimeMillis();
			int indexBF = bfIndexOf(all, sub);
			long endBF = System.currentTimeMillis();
			//
			long timeoutBF = endBF - startBF;
			allBF += timeoutBF;
			System.gc();
			allGC += (System.currentTimeMillis() - endBF);
	
			//
			long startString = System.currentTimeMillis();
			int indexString = stringIndexOf(all, sub);
			long endString = System.currentTimeMillis();
			//
			long timeoutString = endString - startString;
			allString += timeoutString;
			System.gc();
			allGC += (System.currentTimeMillis() - endString);
			

			//
			long startKMP = System.currentTimeMillis();
			//int indexKMP = kmpIndexOf(all, sub);
			int indexKMP = KMP_Index(allArray, subArray);
			long endKMP = System.currentTimeMillis();
			//
			long timeoutKMP = endKMP - startKMP;
			allKMP += timeoutKMP;
			System.gc();
			allGC += (System.currentTimeMillis() - endKMP);
			
			//
			//System.out.println("all="+all.substring(0,100)+" ......");
			//System.out.println("sub="+sub);
			//
			//System. IndexBF out.println (" indexBF = "+ +"; Time: "+ timeoutBF +" ms ");
			//System. IndexString out.println (" indexString = "+ +"; Time: "+ timeoutString +" ms ");
			if(indexBF == indexString && indexKMP == indexString){
				//System. Out.println ("!!!!!!! The contrast is equal." );
			} else {
				throw new RuntimeException(" Compare the error .");
			}
			
			//
			if(i % 20 == 10){
				System.out.println(i+" time bfIndexOf= "+allBF+"ms");
				System.out.println(i+" time stringIndexOf= "+allString+"ms");
				System.out.println(i+" time KMPIndexOf= "+allKMP+"ms");
				System.out.println(i+" time allbuild= "+allbuild+"ms");
				System.out.println(i+" time alltoArray= "+alltoArray+"ms");
				System.out.println(i+"*3 time ,GC= "+allGC+"ms");
				long end = System.currentTimeMillis();
				long allTime = end-start;
				long lossTime = allTime - (allBF+allString+allKMP+allGC);
				System.out.println(i+" time , Total time : "+(allTime)+"ms;  loss :" + lossTime +"ms;  Loss than : " +((0.0+lossTime)/allTime * 100)+"%");
				System.out.println("--------------------------");
			}
			
		}
		//
		System.out.println(times+" time bfIndexOf; In summary : "+allBF+"ms");
		System.out.println(times+" time stringIndexOf; In summary : "+allString+"ms");
		System.out.println(times+" time KMPIndexOf; In summary : "+allKMP+"ms");
		System.out.println(times+" time allbuild= "+allbuild+"ms");
		System.out.println(times+" time alltoArray= "+alltoArray+"ms");
		System.out.println(times+"*3 time ,GC; In summary : "+allGC+"ms");
		long end = System.currentTimeMillis();
		long allTime = end-start;
		long lossTime = allTime - (allBF+allString+allKMP+allGC);
		System.out.println(times+" time , Total time : "+(allTime)+"ms;  loss :" + lossTime +"ms;  Loss than : " +((0.0+lossTime)/allTime * 100)+"%");
		System.out.println("--------------------------");
		
	}
	
	//
	
	
	//Build character

	public static String buildString(int len){
		return buildString(len, null);
	}
	public static String buildString(int len, String sub){
		//
		final int TYPE_NUMBER = 0;
		final int TYPE_LOWERCASE = 1;
		final int TYPE_UPPERCASE = 2;
		//
		long seed = System.nanoTime();
		Random random = new Random(seed);
		//
		//
		StringBuilder builder = new StringBuilder();
		for (int i = 0; i < len; i++) {
			//
			int type = random.nextInt(3);// 0-2
			int cvalue = 0;
			if(TYPE_NUMBER == type){
				cvalue = '0' + random.nextInt(10);// 0~(n-1)
			} else if(TYPE_LOWERCASE == type){
				cvalue = 'a' + random.nextInt(26);// 0~(n-1)
			} else if(TYPE_UPPERCASE == type){
				cvalue = 'A' + random.nextInt(26);// 0~(n-1)
			} else {
				throw new RuntimeException(Random.class.getName()+"#newxtInt(int);Wrong!");
			}
			//
			//cvalue = 'A' + random.nextInt(26);// 0~(n-1)
			//
			char c = (char)cvalue;
			if(null != sub && sub.length() > 1){
				int index = random.nextInt(sub.length());
				c = sub.charAt(index);
			}
			//String s = String.valueOf(c);
			builder.append(c);
			//
		}
		//
		return builder.toString();
	}
	
	


	
	public static int stringIndexOf(String all, String sub){
		//Defensive programming
		if(null == all || null== sub){
			return -1;
		}
		//Call the String method in Java
		return all.indexOf(sub);
	}
	
	
	
	public static int bfIndexOf(String all, String sub){
		//Defensive programming
		if(null == all || null== sub){
			return -1;
		}
		//
		int lenAll = all.length();
		int lenSub = sub.length();
		int i = 0;
		while (i < lenAll) {
			//Let's start with the I subscript
			int j = 0;
			while (j < lenSub) {
				//
				char all_i = all.charAt(i);
				char sub_j = sub.charAt(j);
				if(all_i == sub_j){
					//If they are equal, they continue to compare the next character
					i++;
					j++; //This growth is important
				} else {
					//Otherwise, jump out of the inner loop
					break;
				}
			}
			//If j grows to be equal to lenSub, then the match is successful
			if(lenSub == j){
				return i - lenSub;
			} else {
				i = 1+i - j; //Go back I
			}
		}
		//
		return -1;
	}
	

	
	public static int kmpIndexOf(String all, String sub){
		//
		char[] allArray = all.toCharArray(); 
		char[] subArray = sub.toCharArray(); 
		//
		return KMP_Index(allArray, subArray);
	}
   
  public static int[] next(char[] t) { 
    int[] next = new int[t.length]; 
    next[0] = -1; 
    int i = 0; 
    int j = -1; 
    while (i < t.length - 1) { 
      if (j == -1 || t[i] == t[j]) { 
        i++; 
        j++; 
        if (t[i] != t[j]) { 
          next[i] = j; 
        } else { 
          next[i] = next[j]; 
        } 
      } else { 
        j = next[j]; 
      } 
    } 
    return next; 
  } 
 
   
	public static int KMP_Index(char[] allArray, char[] subArray) { 
    int[] next = next(subArray); 
    int i = 0; 
    int j = 0; 
    while (i <= allArray.length - 1 && j <= subArray.length - 1) { 
      if (j == -1 || allArray[i] == subArray[j]) { 
        i++; 
        j++; 
      } else { 
        j = next[j]; 
      } 
    } 
    if (j < subArray.length) { 
      return -1; 
    } else 
      return i - subArray.length; //Returns the header and subscript of the pattern string in the main string
  } 
}

One result of the test is as follows:


--------------------------
10 time bfIndexOf= 94ms
10 time stringIndexOf= 56ms
10 time KMPIndexOf= 76ms
10 time allbuild= 5870ms
10 time alltoArray= 137ms
10*3 time ,GC= 155ms
10 time , Total time : 6388ms;  loss :6007ms;  Loss than : 94.03569192235442%
--------------------------
30 time bfIndexOf= 365ms
30 time stringIndexOf= 222ms
30 time KMPIndexOf= 295ms
30 time allbuild= 16452ms
30 time alltoArray= 395ms
30*3 time ,GC= 452ms
30 time , Total time : 18184ms;  loss :16850ms;  Loss than : 92.66388033435987%
--------------------------
50 time bfIndexOf; In summary : 550ms
50 time stringIndexOf; In summary : 335ms
50 time KMPIndexOf; In summary : 450ms
50 time allbuild= 26501ms
50 time alltoArray= 637ms
50*3 time ,GC; In summary : 734ms
50 time , Total time : 29211ms;  loss :27142ms;  Loss than : 92.91705179555647%
--------------------------

As you can see, using StringBuilder to construct a string, 8 million *50 appends takes about 26 seconds. That's 16 million per second...
It seems that we need to write a function for timing. Junit has its own statistics, but samples are not easy to do.

In this case, the preparation of the data, i.e. the selection of test cases, is critical and may need to be written into a text file.


Related articles: