PHP English string interception code sharing that can ensure word integrity
- 2021-07-09 07:27:58
- OfStack
Directly on the code:
/**
* Interception of complete words
*
* @param $str
* @param $start
* @param $length
*
* @return string
*/
public static function usubstr($str, $start, $length = null)
{
// Normal interception first 1 Pass .
$res = substr($str, $start, $length);
$strlen = strlen($str);
/* Then judge the head and tail 6 Is the byte complete ( Not incomplete ) */
// If the parameter start Is a positive number
if ($start >= 0) {
// Go forward and intercept about 6 Byte
$next_start = $start + $length; // Initial position
$next_len = $next_start + 6 <= $strlen ? 6 : $strlen - $next_start;
$next_segm = substr($str, $next_start, $next_len);
// If the 1 Bytes are not The first byte of a complete character , And then intercept about 6 Byte
$prev_start = $start - 6 > 0 ? $start - 6 : 0;
$prev_segm = substr($str, $prev_start, $start - $prev_start);
} // start Is a negative number
else {
// Go forward and intercept about 6 Byte
$next_start = $strlen + $start + $length; // Initial position
$next_len = $next_start + 6 <= $strlen ? 6 : $strlen - $next_start;
$next_segm = substr($str, $next_start, $next_len);
// If the 1 Bytes are not The first byte of a complete character , And then intercept about 6 Byte .
$start = $strlen + $start;
$prev_start = $start - 6 > 0 ? $start - 6 : 0;
$prev_segm = substr($str, $prev_start, $start - $prev_start);
}
// Before judgment 6 Does the byte match utf8 Rules
if (preg_match('@^([x80-xBF]{0,5})[xC0-xFD]?@', $next_segm, $bytes)) {
if (!empty($bytes[1])) {
$bytes = $bytes[1];
$res .= $bytes;
}
}
// After judgment 6 Does the byte match utf8 Rules
$ord0 = ord($res[0]);
if (128 <= $ord0 && 191 >= $ord0) {
// Backward interception , And added to res The front of .
if (preg_match('@[xC0-xFD][x80-xBF]{0,5}$@', $prev_segm, $bytes)) {
if (!empty($bytes[0])) {
$bytes = $bytes[0];
$res = $bytes . $res;
}
}
}
if (strlen($res) < $strlen) {
$res = $res . '...';
}
return $res;
}