string.c: Add fastpath to single_byte_optimizable
`rb_enc_from_index` is a costly operation so it is worth avoiding to call it for the common encodings. Also in the case of UTF-8, it's more efficient to scan the coderange if it is unknown that to fallback to the slower algorithms.
This commit is contained in:
parent
5a570421a5
commit
a332367dad
Notes:
git
2024-08-09 20:07:03 +00:00
26
string.c
26
string.c
@ -594,22 +594,30 @@ fstring_cmp(VALUE a, VALUE b)
|
|||||||
memcmp(aptr, bptr, alen) != 0);
|
memcmp(aptr, bptr, alen) != 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline bool
|
||||||
single_byte_optimizable(VALUE str)
|
single_byte_optimizable(VALUE str)
|
||||||
{
|
{
|
||||||
rb_encoding *enc;
|
int encindex = ENCODING_GET(str);
|
||||||
|
switch (encindex) {
|
||||||
|
case ENCINDEX_ASCII_8BIT:
|
||||||
|
case ENCINDEX_US_ASCII:
|
||||||
|
return true;
|
||||||
|
case ENCINDEX_UTF_8:
|
||||||
|
// For UTF-8 it's worth scanning the string coderange when unknown.
|
||||||
|
return rb_enc_str_coderange(str) == ENC_CODERANGE_7BIT;
|
||||||
|
}
|
||||||
/* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
|
/* Conservative. It may be ENC_CODERANGE_UNKNOWN. */
|
||||||
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
|
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT) {
|
||||||
return 1;
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
enc = STR_ENC_GET(str);
|
if (rb_enc_mbmaxlen(rb_enc_from_index(encindex)) == 1) {
|
||||||
if (rb_enc_mbmaxlen(enc) == 1)
|
return true;
|
||||||
return 1;
|
}
|
||||||
|
|
||||||
/* Conservative. Possibly single byte.
|
/* Conservative. Possibly single byte.
|
||||||
* "\xa1" in Shift_JIS for example. */
|
* "\xa1" in Shift_JIS for example. */
|
||||||
return 0;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
VALUE rb_fs;
|
VALUE rb_fs;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user