Analyze the efficiency of strtr function

  • 2020-06-23 00:00:20
  • OfStack

String matching and substitution operations have been common recently. Previously, str_replace or preg_replace were commonly used in 1. strtr is said to be efficient, so compare 1:

$i = 0;
$t = microtime(true);
for(;$i<1000;$i++)
{
    $str = strtr(md5($i), $p2);
}
var_dump(microtime(true)-$t);    //0.085476875305176
$t = microtime(true);
for(;$i<2000;$i++)
{
    $str = preg_replace($p, '', md5($i));
}
var_dump(microtime(true)-$t);   //0.09863805770874

The results show that the efficiency of strtr is about 15% higher than preg_replace.
During the weekend, I checked the php source code of strtr:

PHP_FUNCTION(strtr)
{
        zval **str, **from, **to;
        int ac = ZEND_NUM_ARGS();
        // Parameter checking ( zend_get_parameters_ex The function is defined in zend_api.c File) 
        if (ac < 2 || ac > 3 || zend_get_parameters_ex(ac, &str, &from, &to) == FAILURE) {
                WRONG_PARAM_COUNT;
        }
        // The parameter checking 
        if (ac == 2 && Z_TYPE_PP(from) != IS_ARRAY) {
                php_error_docref(NULL TSRMLS_CC, E_WARNING, "The second argument is not an array.");
                RETURN_FALSE;
        }
        convert_to_string_ex(str);
        /* shortcut for empty string */
        // The macro Z_STRLEN_PP Defined in the zend_operators.h
        if (Z_STRLEN_PP(str) == 0) {
                RETURN_EMPTY_STRING();
        }
        if (ac == 2) {
                php_strtr_array(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), HASH_OF(*from));
        } else {
                convert_to_string_ex(from);
                convert_to_string_ex(to);
                ZVAL_STRINGL(return_value, Z_STRVAL_PP(str), Z_STRLEN_PP(str), 1);
                php_strtr(Z_STRVAL_P(return_value),
                                  Z_STRLEN_P(return_value),
                                  Z_STRVAL_PP(from),
                                  Z_STRVAL_PP(to),
                                  MIN(Z_STRLEN_PP(from),
                                  Z_STRLEN_PP(to)));
        }
}

First look at the php_strtr function:

//trlen Is the string str_from with str_to The minimum of the length of 
PHPAPI char *php_strtr(char *str, int len, char *str_from, char *str_to, int trlen)
{
        int i;
        unsigned char xlat[256]; //
        if ((trlen < 1) || (len < 1)) {
                return str;
        }
        //xlat The index of phi is the same as the value 
        for (i = 0; i < 256; xlat[i] = i, i++);
        // the from to to Each of the strings 1 The characters correspond to each other. Such as: from="ab",to="cd" , will produce such a correspondence 'a'=>'c', 'b'=>'d' . 
        for (i = 0; i < trlen; i++) {
                xlat[(unsigned char) str_from[i]] = str_to[i];
        }
        // replace ( But I think there's room for improvement in the efficiency of this function, because if the number of characters that need to be replaced is just a small part of the whole string, then most of the assignments don't really make sense, in which case it feels efficient to judge first and assign later, right 1 Points. Free test 1 Under the )
        for (i = 0; i < len; i++) {
                str[i] = xlat[(unsigned char) str[i]];
        }
        return str;
}

As you can see, you should be efficient when dealing with operations like strtr('abcdaaabcd', 'ab', 'efd').
(Note: This operation outputs efcdeeefcd)
Now look at php_strtr_array:

static void php_strtr_array(zval *return_value, char *str, int slen, HashTable *hash)
{
        zval **entry;
        char  *string_key;
        uint   string_key_len;
        zval **trans;
        zval   ctmp;
        ulong num_key;
        int minlen = 128*1024;
        int maxlen = 0, pos, len, found;
        char *key;
        HashPosition hpos;
        smart_str result = {0};
        HashTable tmp_hash;
        // Replace the array from hash Copied to the tmp_hash, And record the maximum and minimum length of the subscript string 
        zend_hash_init(&tmp_hash, 0, NULL, NULL, 0);
        zend_hash_internal_pointer_reset_ex(hash, &hpos);
        while (zend_hash_get_current_data_ex(hash, (void **)&entry, &hpos) == SUCCESS) {
                switch (zend_hash_get_current_key_ex(hash, &string_key, &string_key_len, &num_key, 0, &hpos)) {
                        case HASH_KEY_IS_STRING:
                                len = string_key_len-1;
                                if (len < 1) {
                                        zend_hash_destroy(&tmp_hash);
                                        RETURN_FALSE;
                                }
                                zend_hash_add(&tmp_hash, string_key, string_key_len, entry, sizeof(zval*), NULL);
                                if (len > maxlen) {
                                        maxlen = len;
                                }
                                if (len < minlen) {
                                        minlen = len;
                                }
                                break;
                        // Subscripts are converted to string types if they are plastic, for example: array(10=>'aa') Converted to array('10'=>'aa')
                        case HASH_KEY_IS_LONG:
                                Z_TYPE(ctmp) = IS_LONG;
                                Z_LVAL(ctmp) = num_key;
                                convert_to_string(&ctmp);
                                len = Z_STRLEN(ctmp);
                                zend_hash_add(&tmp_hash, Z_STRVAL(ctmp), len+1, entry, sizeof(zval*), NULL);
                                zval_dtor(&ctmp);
                                if (len > maxlen) {
                                        maxlen = len;
                                }
                                if (len < minlen) {
                                        minlen = len;
                                }
                                break;
                }
                zend_hash_move_forward_ex(hash, &hpos);
        }
        key = emalloc(maxlen+1);
        pos = 0; 
        // From the beginning of the string 1 Five characters start a loop match, pos Record the location of the current lookup 
        while (pos < slen) {
                // The current position plus the maximum length, if larger than the string length, the maximum length needs to be changed 
                if ((pos + maxlen) > slen) {
                        maxlen = slen - pos;
                }
                found = 0;
                memcpy(key, str+pos, maxlen);
                // Match from the maximum length, that is, yes 'abcd' If, array('a'=>'e','ab'=>'f') , will be the first ab Replace with f Instead of putting it first a Switch to e . 
                for (len = maxlen; len >= minlen; len--) {
                        key[len] = 0;
                        // Because the use of hash Table, so the efficiency is quite high 
                        if (zend_hash_find(&tmp_hash, key, len+1, (void**)&trans) == SUCCESS) {
                                char *tval;
                                int tlen;
                                zval tmp;
                                if (Z_TYPE_PP(trans) != IS_STRING) {
                                        tmp = **trans;
                                        zval_copy_ctor(&tmp);
                                        convert_to_string(&tmp);
                                        tval = Z_STRVAL(tmp);
                                        tlen = Z_STRLEN(tmp);
                                } else {
                                        tval = Z_STRVAL_PP(trans);
                                        tlen = Z_STRLEN_PP(trans);
                                }
                                // To join the 
                                smart_str_appendl(&result, tval, tlen);
                                // Leap forward 
                                pos += len;
                                found = 1;
                                if (Z_TYPE_PP(trans) != IS_STRING) {
                                        zval_dtor(&tmp);
                                }
                                break;
                        }
                }
                if (! found) {
                        smart_str_appendc(&result, str[pos++]);
                }
        }
        efree(key);
        zend_hash_destroy(&tmp_hash);
        smart_str_0(&result);
        RETVAL_STRINGL(result.c, result.len, 0);
}


Related articles: