Summary of Common Methods of Cutting Chinese Characters by PHP

  • 2021-12-05 05:57:08
  • OfStack

In this paper, the common methods of cutting Chinese characters by PHP are described with examples. Share it for your reference, as follows:


<?php
/*
@UTF-8 Encoded characters may be created by 1~3 It consists of bytes. 
*/
/*-------------------------- Method 1 Method for intercepting Chinese string ------------------------------*/
function msubstr($str, $start, $len)
{
  $tmpstr = "";
  $strlen = $start + $len;
  for ($i = 0; $i < $strlen; $i++) {
    if (ord(substr($str, $i, 1)) > 0xa0)  //ord() Function returns the first of the string 1 Character of ASCII Value 
    {
      $tmpstr .= substr($str, $i, 2);
      $i++;
    } else {
      $tmpstr .= substr($str, $i, 1);
    }
  }
  return $tmpstr;
}
/*---------------------------- No. 1 2 Methods -----------------------------------*/
// What is intercepted is UTF-8 String 
function utf_substr($str, $len)
{
  $new_str = [];
  for ($i = 0; $i < $len; $i++) {
    $tem_str = substr($str, 0, 1);
    if (ord($tem_str > 127)) {
      $i++;
      if ($i < $len) {
        $new_str[] = substr($str, 0, 3);
        $str = substr($str, 3);
      }
    } else {
      $new_str[] = substr($str, 0, 1);
      $str = substr($str, 1);
    }
  }
  return join($new_str);//join() Function to combine array elements into 1 String 
}
/*------------------------------------- No. 1 3 Methods (UTF-8)--------------------------------*/
function cutstr($string, $length)
{
  preg_match_all("/[\x01-\x7f]|[\xc2-\xdf]|[\x80-\xbf]|\xe0[\xa0-\xbf][\x80-\xbf]|[\xe1-\xef][\x80-\xbf][\x80-\xbf]|\xf0[\x90-\xbf][\x80-\xbf][\x80-\xbf]|[\xf1-\xf7][\x80-\xbf][\x80-\xbf][\x80-\xbf]/", $string, $info);
  $wordscut = "";
  $j = 0;
  for ($i = 0; $i < count($info[0]); $i++) {
    $wordscut .= $info[0][$i];
    $j = ord($info[0][$i]) > 127 ? $j + 2 : $j + 1;
    if ($j > $length - 3) {
      return $wordscut . "...";
    }
  }
  return join('', $info[0]);
}
$string = "312 Haha, this combination is difficult to cut ";
echo cutstr($string, 10);
/*--------------------------------- The following are the interceptions that have been used 3 Of the string of ------------------------------*/
// $name1 = mysql_result($my_rst,0,"name");
// $name = preg_match("/([1-9][0-9]+)/",$name1,$r);
// $name = $r[0];
// if($name == ""){
// $name=preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,2}'.
// '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,1}).*#s',
// '$1',$name1);
// }
/*-------------------------------------------- No. 1 4 Methods (UTF-8)---------------------------------------------*/
function cut_str($sourcestr, $cutlength)
{
  $returnstr = '';
  $i = 0;
  $n = 0;
  $str_length = strlen($sourcestr);// Number of bytes of string 
  while ($n < $cutlength && $i <= $str_length) {
    $temp_str = substr($sourcestr, $i, 1);
    $ascnum = ord($temp_str);// Get the first in the string $i Bit character ASCII Code 
    if ($ascnum >= 224) {
      $returnstr = $returnstr . substr($sourcestr, $i, 3);// According to UTF-8 Coding specification, which will 3 Consecutive characters are counted as a single character 
      $i = $i + 3;// Actual Byte Record as 3
      $n++;// String length is 1
    } elseif ($ascnum >= 192)// If ASCII Bit above 192
    {
      $returnstr = $returnstr . substr($sourcestr, $i, 2);// According to UTF-8 Coding specification, which will 2 Consecutive characters are recorded as a single character 
      $i = $i + 2;// Actual Byte Record as 2
      $n++;// String length is 1
    } elseif ($ascnum >= 65 && $ascnum <= 90)// If it is a capital letter 
    {
      $returnstr = $returnstr . substr($sourcestr, $i, 1);
      $i = $i + 1;//byte Record as 1
      $n++;// However, considering the overall beauty, the capital letters are calculated as 1 High-order characters 
    } else {
      $returnstr = $returnstr . substr($sourcestr, $i, 1);
      $i = $i + 1;// Actual Byte Record as 1
      $n = $n + 0.5;// Lowercase letters and half-corner punctuation are wider than half a high-order character ...
    }
  }
  if ($str_length > $cutlength) {
    $returnstr = $returnstr . "...";// Add ellipsis at the end when it exceeds the length 
  }
  return $returnstr;
}
/*-------------------- No. 1 5 Methods ( UTF-8 ) ---------------------------------------------*/
function FSubstr($title, $start, $len = "", $magic = true)
{
  if ($len == "") $len = strlen($title);
  if ($start != 0) {
    $startv = ord(substr($title, $start, 1));
    if ($startv >= 128) {
      if ($startv < 192) {
        for ($i = $start - 1; $i > 0; $i--) {
          $tempv = ord(substr($title, $i, 1));
          if ($tempv >= 192) break;
        }
        $start = $i;
      }
    }
  }
  if (strlen($title) <= $len) return substr($title, $start, $len);
  $alen = 0;
  $blen = 0;
  $realnum = 0;
  $length = 0;
  for ($i = $start; $i < strlen($title); $i++) {
    $ctype = 0;
    $cstep = 0;
    $cur = substr($title, $i, 1);
    if ($cur == "&") {
      if (substr($title, $i, 4) == "&lt;") {
        $cstep = 4;
        $length += 4;
        $i += 3;
        $realnum++;
        if ($magic) {
          $alen++;
        }
      } elseif (substr($title, $i, 4) == "&gt;") {
        $cstep = 4;
        $length += 4;
        $i += 3;
        $realnum++;
        if ($magic) {
          $alen++;
        }
      } elseif (substr($title, $i, 5) == "&amp;") {
        $cstep = 5;
        $length += 5;
        $i += 4;
        $realnum++;
        if ($magic) {
          $alen++;
        }
      } elseif (substr($title, $i, 6) == "&quot;") {
        $cstep = 6;
        $length += 6;
        $i += 5;
        $realnum++;
        if ($magic) {
          $alen++;
        }
      } elseif (preg_match("/&#(\d+);?/i", substr($title, $i), $match)) {
        $cstep = strlen($match[0]);
        $length += strlen($match[0]);
        $i += strlen($match[0]) - 1;
        $realnum++;
        if ($magic) {
          $blen++;
          $ctype = 1;
        }
      }
    } else {
      if (ord($cur) >= 252) {
        $cstep = 6;
        $length += 6;
        $i += 5;
        $realnum++;
        if ($magic) {
          $blen++;
          $ctype = 1;
        }
      } elseif (ord($cur) >= 248) {
        $cstep = 5;
        $length += 5;
        $i += 4;
        $realnum++;
        if ($magic) {
          $ctype = 1;
          $blen++;
        }
      } elseif (ord($cur) >= 240) {
        $cstep = 4;
        $length += 4;
        $i += 3;
        $realnum++;
        if ($magic) {
          $blen++;
          $ctype = 1;
        }
      } elseif (ord($cur) >= 224) {
        $cstep = 3;
        $length += 3;
        $i += 2;
        $realnum++;
        if ($magic) {
          $ctype = 1;
          $blen++;
        }
      } elseif (ord($cur) >= 192) {
        $ctype = 2;
        $length += 2;
        $i += 1;
        $realnum++;
        if ($magic) {
          $blen++;
          $ctype = 1;
        }
      } elseif (ord($cur) >= 128) {
        $length += 1;
      } else {
        $cstep = 1;
        $length += 1;
        $realnum++;
        if ($magic) {
          if (ord($cur) >= 65 && ord($cur) <= 90) {
            $blen++;
          } else {
            $alen++;
          }
        }
      }
    }
    if ($magic) {
      if (($blen * 2 + $alen) == ($len * 2)) break;
      if (($blen * 2 + $alen) == ($len * 2) + 1) {
        if ($ctype == 1) {
          $length -= $cstep;
          break;
        } else {
          break;
        }
      }
    } else {
      if ($realnum == $len) break;
    }
  }
  unset($cur);
  unset($alen);
  unset($blen);
  unset($realnum);
  unset($ctype);
  unset($cstep);
  return substr($title, $start, $length);
}
function utf8Substr($str, $from, $len)
{
  return preg_replace('#^(?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $from . '}' .
    '((?:[\x00-\x7F]|[\xC0-\xFF][\x80-\xBF]+){0,' . $len . '}).*#s',
    '$1', $str);
}
$title = " You Harper niad1 Nasda wop asdni You love nobody, Aston Satan 12ccs- sd";
$title = utf8Substr($title, 0, 15);
echo $title;
?>

For more readers interested in PHP related contents, please check the special topics of this site: "Summary of PHP Encoding and Transcoding Operation Skills", "Encyclopedia of PHP Array (Array) Operation Skills", "Summary of php String (string) Usage", "Summary of php Common Functions and Skills" and "Summary of PHP Error and Exception Handling Methods"

I hope this article is helpful to everyone's PHP programming.


Related articles: