The function of utf8 that actually intercepts strings according to the encoding rules of utf8 is sub_str
- 2020-05-26 08:00:48
- OfStack
/*
* function : Work with substr1 Like, except it won't cause a mess
* parameter :
* return :
*/
function utf8_substr( $str , $start , $length=null ){
// Normal interception first 1 through .
$res = substr( $str , $start , $length );
$strlen = strlen( $str );
/* Then judge the first and the last 6 Byte integrity ( Not damaged ) */
// If the parameter start Is a positive number
if ( $start >= 0 ){
// Let's go ahead and intercept about 6 byte
$next_start = $start + $length; // The initial position
$next_len = $next_start + 6 <= $strlen ? 6 : $strlen - $next_start;
$next_segm = substr( $str , $next_start , $next_len );
// If the first 1 A byte is not The first byte of a full character , Cut back about 6 byte
$prev_start = $start - 6 > 0 ? $start - 6 : 0;
$prev_segm = substr( $str , $prev_start , $start - $prev_start );
}
// start Is negative
else{
// Let's go ahead and intercept about 6 byte
$next_start = $strlen + $start + $length; // The initial position
$next_len = $next_start + 6 <= $strlen ? 6 : $strlen - $next_start;
$next_segm = substr( $str , $next_start , $next_len );
// If the first 1 A byte is not The first byte of a full character , Cut back about 6 byte .
$start = $strlen + $start;
$prev_start = $start - 6 > 0 ? $start - 6 : 0;
$prev_segm = substr( $str , $prev_start , $start - $prev_start );
}
// Before the judge 6 Whether the bytes match utf8 The rules
if ( preg_match( '@^([\x80-\xBF]{0,5})[\xC0-\xFD]?@' , $next_segm , $bytes ) ){
if ( !empty( $bytes[1] ) ){
$bytes = $bytes[1];
$res .= $bytes;
}
}
// After the judgment 6 Whether the bytes match utf8 The rules
$ord0 = ord( $res[0] );
if ( 128 <= $ord0 && 191 >= $ord0 ){
// Intercept back , And in the res In the front of the .
if ( preg_match( '@[\xC0-\xFD][\x80-\xBF]{0,5}$@' , $prev_segm , $bytes ) ){
if ( !empty( $bytes[0] ) ){
$bytes = $bytes[0];
$res = $bytes . $res;
}
}
}
return $res;
}
Test data ::
<?php
$str = 'dfjdjf measuring 13f try 65&2 Data FDJ ( 1 it mfe& ... it ';
var_dump( utf8_substr( $str , 22 , 12 ) ); echo ' <br /> ';
var_dump( utf8_substr( $str , 22 , -6 ) ); echo ' <br /> ';
var_dump( utf8_substr( $str , 9 , 12 ) ); echo ' <br /> ';
var_dump( utf8_substr( $str , 19 , 12 ) ); echo ' <br /> ';
var_dump( utf8_substr( $str , 28 , -6 ) ); echo ' <br /> ';
Display results :(intercept no mess code, welcome to test, submit bug)
string (12) according to FDJ ""
string(26) "according to FDJ (1 on mfe & ..."
string (13) "13 f try 65 & 2 the number"
string(12) "data fd"
string(20) "DJ (1 on mfe & ..."