[ruby/json] parser.rl: extract build_string

https://github.com/ruby/json/commit/7e557ee291
This commit is contained in:
Jean Boussier 2024-10-31 14:43:19 +01:00 committed by Hiroshi SHIBATA
parent 6412e6f6c3
commit 081689b9e2
2 changed files with 97 additions and 83 deletions

View File

@ -1450,6 +1450,43 @@ case 16:
}
}
static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize)
{
if (symbolize) {
intern = true;
}
VALUE result;
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
} else {
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
}
# else
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
if (intern) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 3.0 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
# elif STR_UMINUS_DEDUPE
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, i_uminus, 0);
# else
result = rb_str_freeze(result);
# endif
}
# endif
if (symbolize) {
result = rb_str_intern(result);
}
return result;
}
static const size_t MAX_STACK_BUFFER_SIZE = 128;
static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
{
@ -1561,47 +1598,17 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
buffer += pe - p;
}
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
} else {
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
}
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
ruby_xfree(bufferStart);
}
# else
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
result = build_string(buffer, bufferStart, intern, symbolize);
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
ruby_xfree(bufferStart);
}
if (intern) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
# elif STR_UMINUS_DEDUPE
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, i_uminus, 0);
# else
result = rb_str_freeze(result);
# endif
}
# endif
if (symbolize) {
result = rb_str_intern(result);
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
ruby_xfree(bufferStart);
}
return result;
}
#line 1605 "parser.c"
#line 1612 "parser.c"
enum {JSON_string_start = 1};
enum {JSON_string_first_final = 8};
enum {JSON_string_error = 0};
@ -1609,7 +1616,7 @@ enum {JSON_string_error = 0};
enum {JSON_string_en_main = 1};
#line 633 "parser.rl"
#line 640 "parser.rl"
static int
@ -1630,15 +1637,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
VALUE match_string;
#line 1634 "parser.c"
#line 1641 "parser.c"
{
cs = JSON_string_start;
}
#line 653 "parser.rl"
#line 660 "parser.rl"
json->memo = p;
#line 1642 "parser.c"
#line 1649 "parser.c"
{
if ( p == pe )
goto _test_eof;
@ -1663,7 +1670,7 @@ case 2:
goto st0;
goto st2;
tr2:
#line 620 "parser.rl"
#line 627 "parser.rl"
{
*result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
@ -1673,14 +1680,14 @@ tr2:
{p = (( p + 1))-1;}
}
}
#line 630 "parser.rl"
#line 637 "parser.rl"
{ p--; {p++; cs = 8; goto _out;} }
goto st8;
st8:
if ( ++p == pe )
goto _test_eof8;
case 8:
#line 1684 "parser.c"
#line 1691 "parser.c"
goto st0;
st3:
if ( ++p == pe )
@ -1756,7 +1763,7 @@ case 7:
_out: {}
}
#line 655 "parser.rl"
#line 662 "parser.rl"
if (json->create_additions && RTEST(match_string = json->match_string)) {
VALUE klass;
@ -1953,7 +1960,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
}
#line 1957 "parser.c"
#line 1964 "parser.c"
enum {JSON_start = 1};
enum {JSON_first_final = 10};
enum {JSON_error = 0};
@ -1961,7 +1968,7 @@ enum {JSON_error = 0};
enum {JSON_en_main = 1};
#line 865 "parser.rl"
#line 872 "parser.rl"
/*
@ -1979,16 +1986,16 @@ static VALUE cParser_parse(VALUE self)
GET_PARSER;
#line 1983 "parser.c"
#line 1990 "parser.c"
{
cs = JSON_start;
}
#line 882 "parser.rl"
#line 889 "parser.rl"
p = json->source;
pe = p + json->len;
#line 1992 "parser.c"
#line 1999 "parser.c"
{
if ( p == pe )
goto _test_eof;
@ -2022,7 +2029,7 @@ st0:
cs = 0;
goto _out;
tr2:
#line 857 "parser.rl"
#line 864 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@ -2032,7 +2039,7 @@ st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
#line 2036 "parser.c"
#line 2043 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
@ -2121,7 +2128,7 @@ case 9:
_out: {}
}
#line 885 "parser.rl"
#line 892 "parser.rl"
if (cs >= JSON_first_final && p == pe) {
return result;

View File

@ -461,6 +461,43 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul
}
}
static inline VALUE build_string(const char *buffer, const char *bufferStart, bool intern, bool symbolize)
{
if (symbolize) {
intern = true;
}
VALUE result;
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
} else {
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
}
# else
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
if (intern) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 3.0 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
# elif STR_UMINUS_DEDUPE
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, i_uminus, 0);
# else
result = rb_str_freeze(result);
# endif
}
# endif
if (symbolize) {
result = rb_str_intern(result);
}
return result;
}
static const size_t MAX_STACK_BUFFER_SIZE = 128;
static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int symbolize)
{
@ -572,40 +609,10 @@ static VALUE json_string_unescape(char *string, char *stringEnd, int intern, int
buffer += pe - p;
}
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
result = rb_enc_interned_str(bufferStart, (long)(buffer - bufferStart), rb_utf8_encoding());
} else {
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
}
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
ruby_xfree(bufferStart);
}
# else
result = rb_utf8_str_new(bufferStart, (long)(buffer - bufferStart));
result = build_string(buffer, bufferStart, intern, symbolize);
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
ruby_xfree(bufferStart);
}
if (intern) {
# if STR_UMINUS_DEDUPE_FROZEN
// Starting from MRI 2.8 it is preferable to freeze the string
// before deduplication so that it can be interned directly
// otherwise it would be duplicated first which is wasteful.
result = rb_funcall(rb_str_freeze(result), i_uminus, 0);
# elif STR_UMINUS_DEDUPE
// MRI 2.5 and older do not deduplicate strings that are already
// frozen.
result = rb_funcall(result, i_uminus, 0);
# else
result = rb_str_freeze(result);
# endif
}
# endif
if (symbolize) {
result = rb_str_intern(result);
if (bufferSize > MAX_STACK_BUFFER_SIZE) {
ruby_xfree(bufferStart);
}
return result;