The function of utf8 that actually intercepts strings according to the encoding rules of utf8 is sub_str

  • 2020-05-26 08:00:48
  • OfStack

 
/* 
*  function :  Work with substr1 Like, except it won't cause a mess  
*  parameter : 
*  return : 
*/ 
function utf8_substr( $str , $start , $length=null ){ 
//  Normal interception first 1 through . 
$res = substr( $str , $start , $length ); 
$strlen = strlen( $str ); 
/*  Then judge the first and the last 6 Byte integrity ( Not damaged ) */ 
//  If the parameter start Is a positive number  
if ( $start >= 0 ){ 
//  Let's go ahead and intercept about 6 byte  
$next_start = $start + $length; //  The initial position  
$next_len = $next_start + 6 <= $strlen ? 6 : $strlen - $next_start; 
$next_segm = substr( $str , $next_start , $next_len ); 
//  If the first 1 A byte is not   The first byte of a full character ,  Cut back about 6 byte  
$prev_start = $start - 6 > 0 ? $start - 6 : 0; 
$prev_segm = substr( $str , $prev_start , $start - $prev_start ); 
} 
// start Is negative  
else{ 
//  Let's go ahead and intercept about 6 byte  
$next_start = $strlen + $start + $length; //  The initial position  
$next_len = $next_start + 6 <= $strlen ? 6 : $strlen - $next_start; 
$next_segm = substr( $str , $next_start , $next_len ); 
//  If the first 1 A byte is not   The first byte of a full character ,  Cut back about 6 byte . 
$start = $strlen + $start; 
$prev_start = $start - 6 > 0 ? $start - 6 : 0; 
$prev_segm = substr( $str , $prev_start , $start - $prev_start ); 
} 
//  Before the judge 6 Whether the bytes match utf8 The rules  
if ( preg_match( '@^([\x80-\xBF]{0,5})[\xC0-\xFD]?@' , $next_segm , $bytes ) ){ 
if ( !empty( $bytes[1] ) ){ 
$bytes = $bytes[1]; 
$res .= $bytes; 
} 
} 
//  After the judgment 6 Whether the bytes match utf8 The rules  
$ord0 = ord( $res[0] ); 
if ( 128 <= $ord0 && 191 >= $ord0 ){ 
//  Intercept back  ,  And in the res In the front of the . 
if ( preg_match( '@[\xC0-\xFD][\x80-\xBF]{0,5}$@' , $prev_segm , $bytes ) ){ 
if ( !empty( $bytes[0] ) ){ 
$bytes = $bytes[0]; 
$res = $bytes . $res; 
} 
} 
} 
return $res; 
} 

Test data ::
 
<?php 
$str = 'dfjdjf measuring 13f try 65&2 Data FDJ ( 1 it mfe& ... it '; 
var_dump( utf8_substr( $str , 22 , 12 ) ); echo ' <br /> '; 
var_dump( utf8_substr( $str , 22 , -6 ) ); echo ' <br /> '; 
var_dump( utf8_substr( $str , 9 , 12 ) ); echo ' <br /> '; 
var_dump( utf8_substr( $str , 19 , 12 ) ); echo ' <br /> '; 
var_dump( utf8_substr( $str , 28 , -6 ) ); echo ' <br /> '; 

Display results :(intercept no mess code, welcome to test, submit bug)
string (12) according to FDJ ""
string(26) "according to FDJ (1 on mfe & ..."
string (13) "13 f try 65 & 2 the number"
string(12) "data fd"
string(20) "DJ (1 on mfe & ..."

Related articles: