rb_enc_str_asciionly_p: avoid always fetching the encoding
Profiling of `JSON.dump` shows a significant amount of time spent in `rb_enc_str_asciionly_p`, in large part because it fetches the encoding. It can be made twice as fast in this scenario by first checking the coderange and only falling back to fetching the encoding if the coderange is unknown. Additionally we can skip fetching the encoding for the common popular encodings.
This commit is contained in:
parent
245ed2fc89
commit
b7fa2dd0d0
Notes:
git
2024-09-03 10:21:55 +00:00
32
string.c
32
string.c
@ -137,10 +137,10 @@ VALUE rb_cSymbol;
|
|||||||
} while (0)
|
} while (0)
|
||||||
|
|
||||||
static inline bool
|
static inline bool
|
||||||
str_enc_fastpath(VALUE str)
|
str_encindex_fastpath(int encindex)
|
||||||
{
|
{
|
||||||
// The overwhelming majority of strings are in one of these 3 encodings.
|
// The overwhelming majority of strings are in one of these 3 encodings.
|
||||||
switch (ENCODING_GET_INLINED(str)) {
|
switch (encindex) {
|
||||||
case ENCINDEX_ASCII_8BIT:
|
case ENCINDEX_ASCII_8BIT:
|
||||||
case ENCINDEX_UTF_8:
|
case ENCINDEX_UTF_8:
|
||||||
case ENCINDEX_US_ASCII:
|
case ENCINDEX_US_ASCII:
|
||||||
@ -150,6 +150,12 @@ str_enc_fastpath(VALUE str)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
str_enc_fastpath(VALUE str)
|
||||||
|
{
|
||||||
|
return str_encindex_fastpath(ENCODING_GET_INLINED(str));
|
||||||
|
}
|
||||||
|
|
||||||
#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
|
#define TERM_LEN(str) (str_enc_fastpath(str) ? 1 : rb_enc_mbminlen(rb_enc_from_index(ENCODING_GET(str))))
|
||||||
#define TERM_FILL(ptr, termlen) do {\
|
#define TERM_FILL(ptr, termlen) do {\
|
||||||
char *const term_fill_ptr = (ptr);\
|
char *const term_fill_ptr = (ptr);\
|
||||||
@ -862,16 +868,24 @@ rb_enc_str_coderange(VALUE str)
|
|||||||
return cr;
|
return cr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline bool
|
||||||
|
rb_enc_str_asciicompat(VALUE str)
|
||||||
|
{
|
||||||
|
int encindex = ENCODING_GET_INLINED(str);
|
||||||
|
return str_encindex_fastpath(encindex) || rb_enc_asciicompat(rb_enc_get_from_index(encindex));
|
||||||
|
}
|
||||||
|
|
||||||
int
|
int
|
||||||
rb_enc_str_asciionly_p(VALUE str)
|
rb_enc_str_asciionly_p(VALUE str)
|
||||||
{
|
{
|
||||||
rb_encoding *enc = STR_ENC_GET(str);
|
switch(ENC_CODERANGE(str)) {
|
||||||
|
case ENC_CODERANGE_UNKNOWN:
|
||||||
if (!rb_enc_asciicompat(enc))
|
return rb_enc_str_asciicompat(str) && is_ascii_string(str);
|
||||||
return FALSE;
|
case ENC_CODERANGE_7BIT:
|
||||||
else if (is_ascii_string(str))
|
return true;
|
||||||
return TRUE;
|
default:
|
||||||
return FALSE;
|
return false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void
|
static inline void
|
||||||
|
Loading…
x
Reference in New Issue
Block a user