python determines the language type instance code based on unicode

2020-07-21 08:38:17
OfStack

The example in this paper mainly implements python to judge the language type according to unicode, as follows.

Example code:


def is_chinese(uchar): 
""" judge 1 a unicode Is it Chinese characters? """ 
  if uchar >= u'\u4e00' and uchar<=u'\u9fa5': 
    return True 
  else: 
    return False 
 
def is_number(uchar): 
""" judge 1 a unicode Is it a number """ 
  if uchar >= u'\u0030' and uchar<=u'\u0039': 
    return True 
  else: 
    return False 
 
def is_alphabet(uchar): 
""" judge 1 a unicode Is it An English letter """ 
  if (uchar >= u'\u0041' and uchar<=u'\u005a') or (uchar >= u'\u0061' and uchar<=u'\u007a'): 
    return True 
  else: 
    return False 
 
def is_other(uchar): 
""" Determine whether non-Chinese characters, Numbers and English characters are used """ 
  if not (is_chinese(uchar) or is_number(uchar) or is_alphabet(uchar)): 
    return True 
  else: 
    return False 
 
def B2Q(uchar): 
""" Half Angle to full Angle """ 
  inside_code=ord(uchar) 
  if inside_code<0x0020 or inside_code>0x7e: # Not a half - Angle character returns the original character  
    return uchar 
  if inside_code==0x0020: # The formula of all half angles except blank space is : Half Angle = The Angle of -0xfee0 
    inside_code=0x3000 
  else: 
    inside_code+=0xfee0 
  return unichr(inside_code) 
 
def Q2B(uchar): 
""" Turn half an Angle at all angles """ 
  inside_code=ord(uchar) 
  if inside_code==0x3000: 
    inside_code=0x0020 
  else: 
    inside_code-=0xfee0 
  if inside_code<0x0020 or inside_code>0x7e: # Not a half - Angle character returns the original character after the turn  
    return uchar 
  return unichr(inside_code) 
 
def stringQ2B(ustring): 
""" Rotate the string by half an Angle """ 
  return "".join([Q2B(uchar) for uchar in ustring]) 
 
def uniform(ustring): 
""" Format string, complete full - Angle half - Angle, upper - case to lower - case work """ 
  return stringQ2B(ustring).lower() 
 
def string2List(ustring): 
""" will ustring Separate the letters and Numbers according to Chinese """ 
retList=[] 
utmp=[] 
for uchar in ustring: 
if is_other(uchar): 
if len(utmp)==0: 
continue 
else: 
retList.append("".join(utmp)) 
utmp=[] 
else: 
utmp.append(uchar) 
if len(utmp)!=0: 
retList.append("".join(utmp)) 
return retList

conclusion

That's the end of this article on python judging instance code for language types based on unicode. Interested friends can continue to refer to other related topics in this site, if there is any deficiency, welcome to comment out. Thank you for your support!