Java implementation of hexadecimal character unicode and English translation examples

  • 2020-04-01 02:56:10
  • OfStack

The relationship between unicode and utf can be simply remembered: unicode is an encoding organization, an encoding specification, and in Java utf-16; Utf is the translation conversion format of Unicode encoding, so that it can be transferred in the network and stored in the storage medium. Therefore, there are many utf formats, such as 8, 16, 32, and the following 10 Unicode encoding formats are associated with the differences between le and te.


public static void main(String[] args) throws UnsupportedEncodingException {
                 StringUtil.str2all("0  Product model description ");
  StringUtil.str4all("30000900A74EC1548B57F753CF63F08F");
 }

 public static void str4all(String uStr) throws UnsupportedEncodingException{
  System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[uStr.length()/2];
  for (int i = 0; i < bs.length; i++) {
   bs[i] = (byte) Integer.parseInt(uStr.substring(i*2, i*2+2), 16);
  }

  System.out.println(new String(bs, "utf-8"));
  // 16
  System.out.println(new String(bs, "utf-16")); //With the unicode
  System.out.println(new String(bs, "utf-16le"));
  System.out.println(new String(bs, "x-utf-16le-bom"));
  System.out.println(new String(bs, "utf-16be"));
//  System.out.println(new String(bs, "x-utf-16be-bom")); // UnsupportedEncodingException
  // 32
  System.out.println(new String(bs, "utf-32"));
  System.out.println(new String(bs, "utf-32le"));
  System.out.println(new String(bs, "x-utf-32le-bom"));
  System.out.println(new String(bs, "utf-32be"));
  System.out.println(new String(bs, "x-utf-32le-bom"));
 }

 public static void str2all(String uStr) throws UnsupportedEncodingException{

  System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[]{};

  bs = uStr.getBytes("utf-8");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  // 16
  bs = uStr.getBytes("utf-16");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-16le");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("x-utf-16le-bom");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-16be");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
//  bs = uStr.getBytes("x-utf-16be-bom"); // UnsupportedEncodingException
  // 32
  bs = uStr.getBytes("utf-32");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-32le");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("x-utf-32le-bom");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-32be");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("x-utf-32le-bom");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
 }

Code name collection


charset US-ASCII %S
    historicalName ASCII
    # IANA aliases
    alias iso-ir-6
    alias ANSI_X3.4-1986
    alias ISO_646.irv:1991
    alias ASCII
    alias ISO646-US
    alias us
    alias IBM367
    alias cp367
    alias csASCII
    alias default
    # Other aliases
    alias 646 # Solaris POSIX locale
    alias iso_646.irv:1983
    alias ANSI_X3.4-1968 # Linux POSIX locale (RedHat)
    alias ascii7
charset UTF-8 UTF_8
    historicalName UTF8
    alias UTF8
    alias unicode-1-1-utf-8
charset UTF-16 UTF_16
    historicalName UTF-16
    alias UTF_16
    alias utf16
    alias unicode
    alias UnicodeBig
charset UTF-16BE UTF_16BE
    historicalName UnicodeBigUnmarked
    alias UTF_16BE
    alias ISO-10646-UCS-2
    alias X-UTF-16BE
    alias UnicodeBigUnmarked
charset UTF-16LE UTF_16LE
    historicalName UnicodeLittleUnmarked
    alias UTF_16LE
    alias X-UTF-16LE
    alias UnicodeLittleUnmarked
charset x-UTF-16LE-BOM UTF_16LE_BOM
    historicalName UnicodeLittle
    alias UnicodeLittle
charset UTF-32 UTF_32
    alias UTF_32
    alias UTF32
charset UTF-32LE UTF_32LE
    alias UTF_32LE
    alias X-UTF-32LE
charset UTF-32BE UTF_32BE
    alias UTF_32BE
    alias X-UTF-32BE
charset X-UTF-32LE-BOM UTF_32LE_BOM
    alias UTF_32LE_BOM
    alias UTF-32LE-BOM
charset X-UTF-32BE-BOM UTF_32BE_BOM
    alias UTF_32BE_BOM
    alias UTF-32BE-BOM
charset ISO-8859-1 %S
    historicalName ISO8859_1
    # IANA aliases
    alias iso-ir-100
    alias ISO_8859-1
    alias latin1
    alias l1
    alias IBM819
    alias cp819
    alias csISOLatin1
    # Other aliases
    alias 819
    alias IBM-819
    alias ISO8859_1
    alias ISO_8859-1:1987
    alias ISO_8859_1
    alias 8859_1
    alias ISO8859-1
charset ISO-8859-2 %S
    historicalName ISO8859_2
    alias iso8859_2
    alias 8859_2
    alias iso-ir-101
    alias ISO_8859-2
    alias ISO_8859-2:1987
    alias ISO8859-2
    alias latin2
    alias l2
    alias ibm912
    alias ibm-912
    alias cp912
    alias 912
    alias csISOLatin2
charset ISO-8859-4 %S
    historicalName ISO8859_4
    alias iso8859_4
    alias iso8859-4
    alias 8859_4
    alias iso-ir-110
    alias ISO_8859-4
    alias ISO_8859-4:1988
    alias latin4
    alias l4
    alias ibm914
    alias ibm-914
    alias cp914
    alias 914
    alias csISOLatin4
charset ISO-8859-5 %S
    historicalName ISO8859_5
    alias iso8859_5
    alias 8859_5
    alias iso-ir-144
    alias ISO_8859-5
    alias ISO_8859-5:1988
    alias ISO8859-5
    alias cyrillic
    alias ibm915
    alias ibm-915
    alias cp915
    alias 915
    alias csISOLatinCyrillic
charset ISO-8859-7 %S
    historicalName ISO8859_7
    alias iso8859_7
    alias 8859_7
    alias iso-ir-126
    alias ISO_8859-7
    alias ISO_8859-7:1987
    alias ELOT_928
    alias ECMA-118
    alias greek
    alias greek8
    alias csISOLatinGreek
    alias sun_eu_greek # Solaris 7/8 compatibility
    alias ibm813
    alias ibm-813
    alias 813
    alias cp813
    alias iso8859-7 # Solaris 9 compatibility
charset ISO-8859-9 %S
    historicalName ISO8859_9
    alias iso8859_9
    alias 8859_9
    alias iso-ir-148
    alias ISO_8859-9
    alias ISO_8859-9:1989
    alias ISO8859-9
    alias latin5
    alias l5
    alias ibm920
    alias ibm-920
    alias 920
    alias cp920
    alias csISOLatin5
charset ISO-8859-13 %S
    historicalName ISO8859_13
    alias iso8859_13
    alias 8859_13
    alias iso_8859-13
    alias ISO8859-13
charset ISO-8859-15 %S
    historicalName ISO8859_15
    # IANA alias
    alias ISO_8859-15
    # Other aliases
    alias 8859_15
    alias ISO8859_15
    alias ISO8859-15
    alias IBM923
    alias IBM-923
    alias cp923
    alias 923
    alias LATIN0
    alias LATIN9
    alias L9
    alias csISOlatin0
    alias csISOlatin9
    alias ISO8859_15_FDIS
charset KOI8-R %S
    historicalName KOI8_R
    alias koi8_r
    alias koi8
    alias cskoi8r
charset KOI8-U %S
    alias koi8_u
charset windows-1250 %S
    historicalName Cp1250
    alias cp1250
    alias cp5346 # Euro IBM CCSID
charset windows-1251 %S
    historicalName Cp1251
    alias cp1251
    alias cp5347 # Euro IBM CCSID
    alias ansi-1251 # Solaris compatibility
charset windows-1252 %S
    historicalName Cp1252
    alias cp1252
    alias cp5348 # Euro IBM CCSID
charset windows-1253 %S
    historicalName Cp1253
    alias cp1253
    alias cp5349 # Euro IBM CCSID
charset windows-1254 %S
    historicalName Cp1254
    alias cp1254
    alias cp5350 # Euro IBM CCSID
charset windows-1257 %S
    historicalName Cp1257
    alias cp1257
    alias cp5353 # Euro IBM CCSID

charset IBM437 %S
    historicalName Cp437
    alias cp437
    alias ibm-437
    alias 437
    alias cspc8codepage437
    alias windows-437
charset x-IBM737 %S
    historicalName Cp737
    alias cp737
    alias ibm737
    alias ibm-737
    alias 737
charset IBM775 %S
    historicalName Cp775
    alias cp775
    alias ibm-775
    alias 775
charset IBM850 %S
    historicalName Cp850
    alias cp850
    alias ibm-850
    alias 850
    alias cspc850multilingual
charset IBM852 %S
    historicalName Cp852
    alias cp852
    alias ibm-852
    alias 852
    alias csPCp852
charset IBM855 %S
    historicalName Cp855
    alias cp855
    alias ibm-855
    alias 855
    alias cspcp855
charset IBM857 %S
    historicalName Cp857
    alias cp857
    alias ibm-857
    alias 857
    alias csIBM857
charset IBM00858 %S
    historicalName Cp858
    alias cp858
    alias ccsid00858
    alias cp00858
    alias 858
charset IBM862 %S
    historicalName Cp862
    alias cp862
    alias ibm-862
    alias 862
    alias csIBM862
    alias cspc862latinhebrew
charset IBM866 %S
    historicalName Cp866
    alias cp866
    alias ibm-866
    alias 866
    alias csIBM866
charset x-IBM874 %S
    historicalName Cp874
    alias cp874
    alias ibm874
    alias ibm-874
    alias 874


Related articles: