From 081689b9e2cac65b664117802d01824303ae83c5 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 31 Oct 2024 14:43:19 +0100 Subject: [PATCH] [ruby/json] parser.rl: extract `build_string` https://github.com/ruby/json/commit/7e557ee291 --- ext/json/parser/parser.c | 107 ++++++++++++++++++++------------------ ext/json/parser/parser.rl | 73 ++++++++++++++------------ 2 files changed, 97 insertions(+), 83 deletions(-) diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 1c42ee3868..62f2903326 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1450,6 +1450,43 @@ case 16: } } +static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); + } else { + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + } +# else + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + if (intern) { + # if STR_UMINUS_DEDUPE_FROZEN + // Starting from MRI 3.0 it is preferable to freeze the string + // before deduplication so that it can be interned directly + // otherwise it would be duplicated first which is wasteful. + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + # elif STR_UMINUS_DEDUPE + // MRI 2.5 and older do not deduplicate strings that are already + // frozen. + result = rb_funcall(result, i_uminus, 0); + # else + result = rb_str_freeze(result); + # endif + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + + return result; +} + static const size_t MAX_STACK_BUFFER_SIZE = 128; static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) { @@ -1561,47 +1598,17 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int buffer += pe - p; } -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); - } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } -# else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = build_string(buffer, bufferStart, intern, symbolize); - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } - - if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 2.8 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } -# endif - - if (symbolize) { - result = rb_str_intern(result); + if (bufferSize > MAX_STACK_BUFFER_SIZE) { + ruby_xfree(bufferStart); } return result; } -#line 1605 "parser.c" +#line 1612 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1609,7 +1616,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 633 "parser.rl" +#line 640 "parser.rl" static int @@ -1630,15 +1637,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1634 "parser.c" +#line 1641 "parser.c" { cs = JSON_string_start; } -#line 653 "parser.rl" +#line 660 "parser.rl" json->memo = p; -#line 1642 "parser.c" +#line 1649 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1663,7 +1670,7 @@ case 2: goto st0; goto st2; tr2: -#line 620 "parser.rl" +#line 627 "parser.rl" { *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { @@ -1673,14 +1680,14 @@ tr2: {p = (( p + 1))-1;} } } -#line 630 "parser.rl" +#line 637 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1684 "parser.c" +#line 1691 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1756,7 +1763,7 @@ case 7: _out: {} } -#line 655 "parser.rl" +#line 662 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1953,7 +1960,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 1957 "parser.c" +#line 1964 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1961,7 +1968,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 865 "parser.rl" +#line 872 "parser.rl" /* @@ -1979,16 +1986,16 @@ static VALUE cParser_parse(VALUE self) GET_PARSER; -#line 1983 "parser.c" +#line 1990 "parser.c" { cs = JSON_start; } -#line 882 "parser.rl" +#line 889 "parser.rl" p = json->source; pe = p + json->len; -#line 1992 "parser.c" +#line 1999 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2022,7 +2029,7 @@ st0: cs = 0; goto _out; tr2: -#line 857 "parser.rl" +#line 864 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2032,7 +2039,7 @@ st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 2036 "parser.c" +#line 2043 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2121,7 +2128,7 @@ case 9: _out: {} } -#line 885 "parser.rl" +#line 892 "parser.rl" if (cs >= JSON_first_final && p == pe) { return result; diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl index 3c42afa254..35a9766ddb 100644 --- a/ext/json/parser/parser.rl +++ b/ext/json/parser/parser.rl @@ -461,6 +461,43 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul } } +static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize) +{ + if (symbolize) { + intern = true; + } + VALUE result; +# ifdef HAVE_RB_ENC_INTERNED_STR + if (intern) { + result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); + } else { + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + } +# else + result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + if (intern) { + # if STR_UMINUS_DEDUPE_FROZEN + // Starting from MRI 3.0 it is preferable to freeze the string + // before deduplication so that it can be interned directly + // otherwise it would be duplicated first which is wasteful. + result = rb_funcall(rb_str_freeze(result), i_uminus, 0); + # elif STR_UMINUS_DEDUPE + // MRI 2.5 and older do not deduplicate strings that are already + // frozen. + result = rb_funcall(result, i_uminus, 0); + # else + result = rb_str_freeze(result); + # endif + } +# endif + + if (symbolize) { + result = rb_str_intern(result); + } + + return result; +} + static const size_t MAX_STACK_BUFFER_SIZE = 128; static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize) { @@ -572,40 +609,10 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int buffer += pe - p; } -# ifdef HAVE_RB_ENC_INTERNED_STR - if (intern) { - result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding()); - } else { - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); - } - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } -# else - result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart)); + result = build_string(buffer, bufferStart, intern, symbolize); - if (bufferSize > MAX_STACK_BUFFER_SIZE) { - ruby_xfree(bufferStart); - } - - if (intern) { - # if STR_UMINUS_DEDUPE_FROZEN - // Starting from MRI 2.8 it is preferable to freeze the string - // before deduplication so that it can be interned directly - // otherwise it would be duplicated first which is wasteful. - result = rb_funcall(rb_str_freeze(result), i_uminus, 0); - # elif STR_UMINUS_DEDUPE - // MRI 2.5 and older do not deduplicate strings that are already - // frozen. - result = rb_funcall(result, i_uminus, 0); - # else - result = rb_str_freeze(result); - # endif - } -# endif - - if (symbolize) { - result = rb_str_intern(result); + if (bufferSize > MAX_STACK_BUFFER_SIZE) { + ruby_xfree(bufferStart); } return result;