[ruby/json] parser.rl: parse_string implement a fast path
If we assume most string don't contain any escape sequence we can avoid a lot of costly operations when it holds true. Before: ``` == Parsing activitypub.json (58160 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 884.000 i/100ms oj 789.000 i/100ms Oj::Parser 943.000 i/100ms rapidjson 584.000 i/100ms Calculating ------------------------------------- json 8.897k (± 1.3%) i/s (112.40 μs/i) - 45.084k in 5.068520s oj 7.967k (± 1.5%) i/s (125.52 μs/i) - 40.239k in 5.051985s Oj::Parser 9.564k (± 1.4%) i/s (104.56 μs/i) - 48.093k in 5.029626s rapidjson 5.947k (± 1.4%) i/s (168.16 μs/i) - 29.784k in 5.009437s Comparison: json: 8896.5 i/s Oj::Parser: 9563.8 i/s - 1.08x faster oj: 7966.8 i/s - 1.12x slower rapidjson: 5946.7 i/s - 1.50x slower == Parsing twitter.json (567916 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 83.000 i/100ms oj 64.000 i/100ms Oj::Parser 77.000 i/100ms rapidjson 54.000 i/100ms Calculating ------------------------------------- json 823.083 (± 1.8%) i/s (1.21 ms/i) - 4.150k in 5.043805s oj 632.538 (± 1.4%) i/s (1.58 ms/i) - 3.200k in 5.060073s Oj::Parser 769.122 (± 1.8%) i/s (1.30 ms/i) - 3.850k in 5.007501s rapidjson 548.494 (± 1.5%) i/s (1.82 ms/i) - 2.754k in 5.022153s Comparison: json: 823.1 i/s Oj::Parser: 769.1 i/s - 1.07x slower oj: 632.5 i/s - 1.30x slower rapidjson: 548.5 i/s - 1.50x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 41.000 i/100ms oj 34.000 i/100ms Oj::Parser 45.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 427.162 (± 1.2%) i/s (2.34 ms/i) - 2.173k in 5.087666s oj 351.463 (± 2.8%) i/s (2.85 ms/i) - 1.768k in 5.035149s Oj::Parser 461.849 (± 3.7%) i/s (2.17 ms/i) - 2.340k in 5.074461s rapidjson 395.155 (± 1.8%) i/s (2.53 ms/i) - 1.989k in 5.034927s Comparison: json: 427.2 i/s Oj::Parser: 461.8 i/s - 1.08x faster rapidjson: 395.2 i/s - 1.08x slower oj: 351.5 i/s - 1.22x slower ``` After: ``` == Parsing activitypub.json (58160 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 953.000 i/100ms oj 813.000 i/100ms Oj::Parser 956.000 i/100ms rapidjson 563.000 i/100ms Calculating ------------------------------------- json 9.525k (± 1.2%) i/s (104.98 μs/i) - 47.650k in 5.003252s oj 8.117k (± 0.5%) i/s (123.20 μs/i) - 40.650k in 5.008283s Oj::Parser 9.590k (± 3.2%) i/s (104.27 μs/i) - 48.756k in 5.089794s rapidjson 6.020k (± 0.9%) i/s (166.10 μs/i) - 30.402k in 5.050155s Comparison: json: 9525.3 i/s Oj::Parser: 9590.1 i/s - same-ish: difference falls within error oj: 8116.7 i/s - 1.17x slower rapidjson: 6020.5 i/s - 1.58x slower == Parsing twitter.json (567916 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 87.000 i/100ms oj 64.000 i/100ms Oj::Parser 75.000 i/100ms rapidjson 55.000 i/100ms Calculating ------------------------------------- json 866.563 (± 0.8%) i/s (1.15 ms/i) - 4.350k in 5.020138s oj 643.567 (± 0.8%) i/s (1.55 ms/i) - 3.264k in 5.072101s Oj::Parser 777.346 (± 3.5%) i/s (1.29 ms/i) - 3.900k in 5.023933s rapidjson 557.158 (± 0.7%) i/s (1.79 ms/i) - 2.805k in 5.034731s Comparison: json: 866.6 i/s Oj::Parser: 777.3 i/s - 1.11x slower oj: 643.6 i/s - 1.35x slower rapidjson: 557.2 i/s - 1.56x slower == Parsing citm_catalog.json (1727030 bytes) ruby 3.4.0dev (2024-11-06T07:59:09Z precompute-hash-wh.. https://github.com/ruby/json/commit/7943f98a8a) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- json 41.000 i/100ms oj 35.000 i/100ms Oj::Parser 40.000 i/100ms rapidjson 39.000 i/100ms Calculating ------------------------------------- json 429.216 (± 1.2%) i/s (2.33 ms/i) - 2.173k in 5.063351s oj 354.755 (± 1.1%) i/s (2.82 ms/i) - 1.785k in 5.032374s Oj::Parser 465.114 (± 3.7%) i/s (2.15 ms/i) - 2.360k in 5.081634s rapidjson 387.135 (± 1.3%) i/s (2.58 ms/i) - 1.950k in 5.037787s Comparison: json: 429.2 i/s Oj::Parser: 465.1 i/s - 1.08x faster rapidjson: 387.1 i/s - 1.11x slower oj: 354.8 i/s - 1.21x slower ``` https://github.com/ruby/json/commit/96bd97c61e
This commit is contained in:
parent
8254f6492c
commit
6cea370b23
@ -2302,6 +2302,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
||||
return result;
|
||||
}
|
||||
|
||||
static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
|
||||
{
|
||||
size_t bufferSize = stringEnd - string;
|
||||
|
||||
if (is_name) {
|
||||
VALUE cached_key;
|
||||
if (RB_UNLIKELY(symbolize)) {
|
||||
cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
|
||||
} else {
|
||||
cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
|
||||
}
|
||||
|
||||
if (RB_LIKELY(cached_key)) {
|
||||
return cached_key;
|
||||
}
|
||||
}
|
||||
|
||||
return build_string(string, stringEnd, intern, symbolize);
|
||||
}
|
||||
|
||||
static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
|
||||
{
|
||||
size_t bufferSize = stringEnd - string;
|
||||
@ -2323,7 +2343,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE
|
||||
}
|
||||
|
||||
pe = memchr(p, '\\', bufferSize);
|
||||
if (RB_LIKELY(pe == NULL)) {
|
||||
if (RB_UNLIKELY(pe == NULL)) {
|
||||
return build_string(string, stringEnd, intern, symbolize);
|
||||
}
|
||||
|
||||
@ -2424,15 +2444,15 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE
|
||||
}
|
||||
|
||||
|
||||
#line 2428 "parser.c"
|
||||
#line 2448 "parser.c"
|
||||
enum {JSON_string_start = 1};
|
||||
enum {JSON_string_first_final = 8};
|
||||
enum {JSON_string_first_final = 9};
|
||||
enum {JSON_string_error = 0};
|
||||
|
||||
enum {JSON_string_en_main = 1};
|
||||
|
||||
|
||||
#line 1019 "parser.rl"
|
||||
#line 1051 "parser.rl"
|
||||
|
||||
|
||||
static int
|
||||
@ -2453,15 +2473,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
|
||||
VALUE match_string;
|
||||
|
||||
|
||||
#line 2457 "parser.c"
|
||||
#line 2477 "parser.c"
|
||||
{
|
||||
cs = JSON_string_start;
|
||||
}
|
||||
|
||||
#line 1039 "parser.rl"
|
||||
#line 1071 "parser.rl"
|
||||
json->memo = p;
|
||||
|
||||
#line 2465 "parser.c"
|
||||
#line 2485 "parser.c"
|
||||
{
|
||||
if ( p == pe )
|
||||
goto _test_eof;
|
||||
@ -2486,47 +2506,56 @@ case 2:
|
||||
goto st0;
|
||||
goto st2;
|
||||
tr2:
|
||||
#line 1006 "parser.rl"
|
||||
#line 1033 "parser.rl"
|
||||
{
|
||||
*result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
||||
{p = (( p + 1))-1;}
|
||||
p--;
|
||||
{p++; cs = 9; goto _out;}
|
||||
}
|
||||
#line 1026 "parser.rl"
|
||||
{
|
||||
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
||||
if (NIL_P(*result)) {
|
||||
p--;
|
||||
{p++; cs = 8; goto _out;}
|
||||
} else {
|
||||
{p = (( p + 1))-1;}
|
||||
}
|
||||
{p = (( p + 1))-1;}
|
||||
p--;
|
||||
{p++; cs = 9; goto _out;}
|
||||
}
|
||||
#line 1016 "parser.rl"
|
||||
{ p--; {p++; cs = 8; goto _out;} }
|
||||
goto st8;
|
||||
st8:
|
||||
goto st9;
|
||||
tr6:
|
||||
#line 1026 "parser.rl"
|
||||
{
|
||||
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
||||
{p = (( p + 1))-1;}
|
||||
p--;
|
||||
{p++; cs = 9; goto _out;}
|
||||
}
|
||||
goto st9;
|
||||
st9:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof8;
|
||||
case 8:
|
||||
#line 2507 "parser.c"
|
||||
goto _test_eof9;
|
||||
case 9:
|
||||
#line 2538 "parser.c"
|
||||
goto st0;
|
||||
st3:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof3;
|
||||
case 3:
|
||||
if ( (*p) == 117 )
|
||||
goto st4;
|
||||
goto st5;
|
||||
if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
|
||||
goto st0;
|
||||
goto st2;
|
||||
goto st4;
|
||||
st4:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof4;
|
||||
case 4:
|
||||
if ( (*p) < 65 ) {
|
||||
if ( 48 <= (*p) && (*p) <= 57 )
|
||||
goto st5;
|
||||
} else if ( (*p) > 70 ) {
|
||||
if ( 97 <= (*p) && (*p) <= 102 )
|
||||
goto st5;
|
||||
} else
|
||||
goto st5;
|
||||
goto st0;
|
||||
switch( (*p) ) {
|
||||
case 34: goto tr6;
|
||||
case 92: goto st3;
|
||||
}
|
||||
if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 )
|
||||
goto st0;
|
||||
goto st4;
|
||||
st5:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof5;
|
||||
@ -2559,27 +2588,41 @@ st7:
|
||||
case 7:
|
||||
if ( (*p) < 65 ) {
|
||||
if ( 48 <= (*p) && (*p) <= 57 )
|
||||
goto st2;
|
||||
goto st8;
|
||||
} else if ( (*p) > 70 ) {
|
||||
if ( 97 <= (*p) && (*p) <= 102 )
|
||||
goto st2;
|
||||
goto st8;
|
||||
} else
|
||||
goto st2;
|
||||
goto st8;
|
||||
goto st0;
|
||||
st8:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof8;
|
||||
case 8:
|
||||
if ( (*p) < 65 ) {
|
||||
if ( 48 <= (*p) && (*p) <= 57 )
|
||||
goto st4;
|
||||
} else if ( (*p) > 70 ) {
|
||||
if ( 97 <= (*p) && (*p) <= 102 )
|
||||
goto st4;
|
||||
} else
|
||||
goto st4;
|
||||
goto st0;
|
||||
}
|
||||
_test_eof2: cs = 2; goto _test_eof;
|
||||
_test_eof8: cs = 8; goto _test_eof;
|
||||
_test_eof9: cs = 9; goto _test_eof;
|
||||
_test_eof3: cs = 3; goto _test_eof;
|
||||
_test_eof4: cs = 4; goto _test_eof;
|
||||
_test_eof5: cs = 5; goto _test_eof;
|
||||
_test_eof6: cs = 6; goto _test_eof;
|
||||
_test_eof7: cs = 7; goto _test_eof;
|
||||
_test_eof8: cs = 8; goto _test_eof;
|
||||
|
||||
_test_eof: {}
|
||||
_out: {}
|
||||
}
|
||||
|
||||
#line 1041 "parser.rl"
|
||||
#line 1073 "parser.rl"
|
||||
|
||||
if (json->create_additions && RTEST(match_string = json->match_string)) {
|
||||
VALUE klass;
|
||||
@ -2732,7 +2775,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
|
||||
}
|
||||
|
||||
|
||||
#line 2736 "parser.c"
|
||||
#line 2779 "parser.c"
|
||||
enum {JSON_start = 1};
|
||||
enum {JSON_first_final = 10};
|
||||
enum {JSON_error = 0};
|
||||
@ -2740,7 +2783,7 @@ enum {JSON_error = 0};
|
||||
enum {JSON_en_main = 1};
|
||||
|
||||
|
||||
#line 1207 "parser.rl"
|
||||
#line 1239 "parser.rl"
|
||||
|
||||
|
||||
/*
|
||||
@ -2769,16 +2812,16 @@ static VALUE cParser_parse(VALUE self)
|
||||
json->stack = &stack;
|
||||
|
||||
|
||||
#line 2773 "parser.c"
|
||||
#line 2816 "parser.c"
|
||||
{
|
||||
cs = JSON_start;
|
||||
}
|
||||
|
||||
#line 1235 "parser.rl"
|
||||
#line 1267 "parser.rl"
|
||||
p = json->source;
|
||||
pe = p + json->len;
|
||||
|
||||
#line 2782 "parser.c"
|
||||
#line 2825 "parser.c"
|
||||
{
|
||||
if ( p == pe )
|
||||
goto _test_eof;
|
||||
@ -2812,7 +2855,7 @@ st0:
|
||||
cs = 0;
|
||||
goto _out;
|
||||
tr2:
|
||||
#line 1199 "parser.rl"
|
||||
#line 1231 "parser.rl"
|
||||
{
|
||||
char *np = JSON_parse_value(json, p, pe, &result, 0);
|
||||
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
|
||||
@ -2822,7 +2865,7 @@ st10:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof10;
|
||||
case 10:
|
||||
#line 2826 "parser.c"
|
||||
#line 2869 "parser.c"
|
||||
switch( (*p) ) {
|
||||
case 13: goto st10;
|
||||
case 32: goto st10;
|
||||
@ -2911,7 +2954,7 @@ case 9:
|
||||
_out: {}
|
||||
}
|
||||
|
||||
#line 1238 "parser.rl"
|
||||
#line 1270 "parser.rl"
|
||||
|
||||
if (json->stack_handle) {
|
||||
rvalue_stack_eagerly_release(json->stack_handle);
|
||||
@ -2947,16 +2990,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
|
||||
json->stack = &stack;
|
||||
|
||||
|
||||
#line 2951 "parser.c"
|
||||
#line 2994 "parser.c"
|
||||
{
|
||||
cs = JSON_start;
|
||||
}
|
||||
|
||||
#line 1273 "parser.rl"
|
||||
#line 1305 "parser.rl"
|
||||
p = json->source;
|
||||
pe = p + json->len;
|
||||
|
||||
#line 2960 "parser.c"
|
||||
#line 3003 "parser.c"
|
||||
{
|
||||
if ( p == pe )
|
||||
goto _test_eof;
|
||||
@ -2990,7 +3033,7 @@ st0:
|
||||
cs = 0;
|
||||
goto _out;
|
||||
tr2:
|
||||
#line 1199 "parser.rl"
|
||||
#line 1231 "parser.rl"
|
||||
{
|
||||
char *np = JSON_parse_value(json, p, pe, &result, 0);
|
||||
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
|
||||
@ -3000,7 +3043,7 @@ st10:
|
||||
if ( ++p == pe )
|
||||
goto _test_eof10;
|
||||
case 10:
|
||||
#line 3004 "parser.c"
|
||||
#line 3047 "parser.c"
|
||||
switch( (*p) ) {
|
||||
case 13: goto st10;
|
||||
case 32: goto st10;
|
||||
@ -3089,7 +3132,7 @@ case 9:
|
||||
_out: {}
|
||||
}
|
||||
|
||||
#line 1276 "parser.rl"
|
||||
#line 1308 "parser.rl"
|
||||
|
||||
if (json->stack_handle) {
|
||||
rvalue_stack_eagerly_release(json->stack_handle);
|
||||
|
@ -658,10 +658,10 @@ main := ignore* (
|
||||
Vtrue @parse_true |
|
||||
VNaN @parse_nan |
|
||||
VInfinity @parse_infinity |
|
||||
begin_number >parse_number |
|
||||
begin_string >parse_string |
|
||||
begin_array >parse_array |
|
||||
begin_object >parse_object
|
||||
begin_number @parse_number |
|
||||
begin_string @parse_string |
|
||||
begin_array @parse_array |
|
||||
begin_object @parse_object
|
||||
) ignore* %*exit;
|
||||
}%%
|
||||
|
||||
@ -876,6 +876,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
|
||||
return result;
|
||||
}
|
||||
|
||||
static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
|
||||
{
|
||||
size_t bufferSize = stringEnd - string;
|
||||
|
||||
if (is_name) {
|
||||
VALUE cached_key;
|
||||
if (RB_UNLIKELY(symbolize)) {
|
||||
cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
|
||||
} else {
|
||||
cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
|
||||
}
|
||||
|
||||
if (RB_LIKELY(cached_key)) {
|
||||
return cached_key;
|
||||
}
|
||||
}
|
||||
|
||||
return build_string(string, stringEnd, intern, symbolize);
|
||||
}
|
||||
|
||||
static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
|
||||
{
|
||||
size_t bufferSize = stringEnd - string;
|
||||
@ -897,7 +917,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE
|
||||
}
|
||||
|
||||
pe = memchr(p, '\\', bufferSize);
|
||||
if (RB_LIKELY(pe == NULL)) {
|
||||
if (RB_UNLIKELY(pe == NULL)) {
|
||||
return build_string(string, stringEnd, intern, symbolize);
|
||||
}
|
||||
|
||||
@ -1003,19 +1023,31 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE
|
||||
|
||||
write data;
|
||||
|
||||
action parse_string {
|
||||
action parse_complex_string {
|
||||
*result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
||||
if (NIL_P(*result)) {
|
||||
fhold;
|
||||
fbreak;
|
||||
} else {
|
||||
fexec p + 1;
|
||||
}
|
||||
fexec p + 1;
|
||||
fhold;
|
||||
fbreak;
|
||||
}
|
||||
|
||||
action exit { fhold; fbreak; }
|
||||
action parse_simple_string {
|
||||
*result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
|
||||
fexec p + 1;
|
||||
fhold;
|
||||
fbreak;
|
||||
}
|
||||
|
||||
main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit;
|
||||
double_quote = '"';
|
||||
escape = '\\';
|
||||
control = 0..0x1f;
|
||||
simple = any - escape - double_quote - control;
|
||||
|
||||
main := double_quote (
|
||||
(simple*)(
|
||||
(double_quote) @parse_simple_string |
|
||||
((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string
|
||||
)
|
||||
);
|
||||
}%%
|
||||
|
||||
static int
|
||||
|
Loading…
x
Reference in New Issue
Block a user