Java to determine the character for the Chinese instance code of super useful

  • 2020-04-01 04:39:05
  • OfStack

In doing projects, we often encounter some problems that require us to judge the characters as Chinese, so we collect the code fragments for judging Chinese characters, and hereby share them for your reference.

Post the code directly, with detailed comments.


package com.coder4j.main;
import java.util.regex.Pattern;

public class CheckChinese {
public static void main(String[] args) {
//Pure English
String s1 = "Hello,Tom.!@#$%^&*()_+-={}|[];':"?";
//Pure Chinese (without Chinese punctuation)
String s2 = " How are you Chinese ";
//Pure Chinese (including Chinese punctuation)
String s3 = " Hello, China. "" :" " ' ; () []! RMB, ";
//Korean
String s4 = "한국어난";
//Japanese
String s5 = " ぎ じ ゅ つ ";
//Special characters
String s6 = "��";
String s7 = " ╃ ";
String s8 = " ╂ ";
//Traditional Chinese
String s9 = " Old � � ";
//1 use character range determination
System.out.println("s1 Does it contain Chinese: " + hasChineseByRange(s1));// false
System.out.println("s2 Does it contain Chinese: " + hasChineseByRange(s2));// true
System.out.println("s3 Does it contain Chinese: " + hasChineseByRange(s3));// true
System.out.println("s4 Does it contain Chinese: " + hasChineseByRange(s4));// false
System.out.println("s5 Does it contain Chinese: " + hasChineseByRange(s5));// false
System.out.println("s6 Does it contain Chinese: " + hasChineseByRange(s6));// false
System.out.println("s7 Does it contain Chinese: " + hasChineseByRange(s7));// false
System.out.println("s8 Does it contain Chinese: " + hasChineseByRange(s8));// false
System.out.println("s9 Does it contain Chinese: " + hasChineseByRange(s9));// true
System.out.println("------- The divider -------");
System.out.println("s1 Is it all in Chinese? " + isChineseByRange(s1));// false
System.out.println("s2 Is it all in Chinese? " + isChineseByRange(s2));// true
System.out.println("s3 Is it all in Chinese? " + isChineseByRange(s3));//False Chinese punctuation is out of scope
System.out.println("s4 Is it all in Chinese? " + isChineseByRange(s4));// false
System.out.println("s5 Is it all in Chinese? " + isChineseByRange(s5));// false
System.out.println("s6 Is it all in Chinese? " + isChineseByRange(s6));// false
System.out.println("s7 Is it all in Chinese? " + isChineseByRange(s7));// false
System.out.println("s8 Is it all in Chinese? " + isChineseByRange(s8));// false
System.out.println("s9 Is it all in Chinese? " + isChineseByRange(s9));// true
System.out.println("------- The divider -------");
//2 use character range regular judgment (same as 1)
System.out.println("s1 Does it contain Chinese: " + hasChineseByReg(s1));// false
System.out.println("s2 Does it contain Chinese: " + hasChineseByReg(s2));// true
System.out.println("s3 Does it contain Chinese: " + hasChineseByReg(s3));// true
System.out.println("s4 Does it contain Chinese: " + hasChineseByReg(s4));// false
System.out.println("s5 Does it contain Chinese: " + hasChineseByReg(s5));// false
System.out.println("s6 Does it contain Chinese: " + hasChineseByReg(s6));// false
System.out.println("s7 Does it contain Chinese: " + hasChineseByReg(s7));// false
System.out.println("s8 Does it contain Chinese: " + hasChineseByReg(s8));// false
System.out.println("s9 Does it contain Chinese: " + hasChineseByReg(s9));// true
System.out.println("------- The divider -------");
System.out.println("s1 Is it all in Chinese? " + isChineseByReg(s1));// false
System.out.println("s2 Is it all in Chinese? " + isChineseByReg(s2));// true
System.out.println("s3 Is it all in Chinese? " + isChineseByReg(s3));//False Chinese punctuation is out of scope
System.out.println("s4 Is it all in Chinese? " + isChineseByReg(s4));// false
System.out.println("s5 Is it all in Chinese? " + isChineseByReg(s5));// false
System.out.println("s6 Is it all in Chinese? " + isChineseByReg(s6));// false
System.out.println("s7 Is it all in Chinese? " + isChineseByReg(s7));// false
System.out.println("s8 Is it all in Chinese? " + isChineseByReg(s8));// false
System.out.println("s9 Is it all in Chinese? " + isChineseByReg(s9));// true
System.out.println("------- The divider -------");
//3. Use CJK character set
System.out.println("s1 Does it contain Chinese: " + hasChinese(s1));// false
System.out.println("s2 Does it contain Chinese: " + hasChinese(s2));// true
System.out.println("s3 Does it contain Chinese: " + hasChinese(s3));// true
System.out.println("s4 Does it contain Chinese: " + hasChinese(s4));// false
System.out.println("s5 Does it contain Chinese: " + hasChinese(s5));// false
System.out.println("s6 Does it contain Chinese: " + hasChinese(s6));// false
System.out.println("s7 Does it contain Chinese: " + hasChinese(s7));// false
System.out.println("s8 Does it contain Chinese: " + hasChinese(s8));// false
System.out.println("s9 Does it contain Chinese: " + hasChinese(s9));// true
System.out.println("------- The divider -------");
System.out.println("s1 Is it all in Chinese? " + isChinese(s1));// false
System.out.println("s2 Is it all in Chinese? " + isChinese(s2));// true
System.out.println("s3 Is it all in Chinese? " + isChinese(s3));//True Chinese punctuation is also included
System.out.println("s4 Is it all in Chinese? " + isChinese(s4));// false
System.out.println("s5 Is it all in Chinese? " + isChinese(s5));// false
System.out.println("s6 Is it all in Chinese? " + isChinese(s6));// false
System.out.println("s7 Is it all in Chinese? " + isChinese(s7));// false
System.out.println("s8 Is it all in Chinese? " + isChinese(s8));// false
System.out.println("s9 Is it all in Chinese? " + isChinese(s9));// true
}

public static boolean hasChinese(String str) {
if (str == null) {
return false;
}
char[] ch = str.toCharArray();
for (char c : ch) {
if (isChinese(c)) {
return true;
}
}
return false;
}

public static boolean isChinese(String str) {
if (str == null) {
return false;
}
char[] ch = str.toCharArray();
for (char c : ch) {
if (!isChinese(c)) {
return false;
}
}
return true;
}

private static boolean isChinese(char c) {
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
return true;
} else if (ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS) {
return true;
} else if (ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION) {
return true;
} else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) {
return true;
} else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) {
return true;
} else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C) {
return true;
} else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D) {
return true;
} else if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
return true;
} else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
return true;
}
return false;
}

public static boolean hasChineseByReg(String str) {
if (str == null) {
return false;
}
Pattern pattern = Pattern.compile("[\u4E00-\u9FBF]+");
return pattern.matcher(str).find();
}

public static boolean isChineseByReg(String str) {
if (str == null) {
return false;
}
Pattern pattern = Pattern.compile("[\u4E00-\u9FBF]+");
return pattern.matcher(str).matches();
}

public static boolean hasChineseByRange(String str) {
if (str == null) {
return false;
}
char[] ch = str.toCharArray();
for (char c : ch) {
if (c >= 0x4E00 && c <= 0x9FBF) {
return true;
}
}
return false;
}

public static boolean isChineseByRange(String str) {
if (str == null) {
return false;
}
char[] ch = str.toCharArray();
for (char c : ch) {
if (c < 0x4E00 || c > 0x9FBF) {
return false;
}
}
return true;
}
}

If you only want to judge whether it is Chinese or not, do not need to judge Chinese punctuation, it is recommended to use regular to match, may be more efficient.

The above code content to introduce the Java judgment character for the Chinese instance code (super useful), I hope to help you.


Related articles: