Remove ruby object from string nodes

String nodes holds ruby string object on `VALUE nd_lit`.
This commit changes it to `struct rb_parser_string *string`
to reduce dependency on ruby object.
Sometimes these strings are concatenated with other string
therefore string concatenate functions are needed.
This commit is contained in:
yui-knk 2024-02-03 00:50:02 +09:00 committed by Yuichiro Kaneko
parent f7a407cabd
commit 33c1e082d0
9 changed files with 598 additions and 180 deletions

5
ast.c
View File

@ -555,9 +555,10 @@ node_children(rb_ast_t *ast, const NODE *node)
return rb_ary_new_from_node_args(ast, 2, RNODE_MATCH3(node)->nd_recv, RNODE_MATCH3(node)->nd_value); return rb_ary_new_from_node_args(ast, 2, RNODE_MATCH3(node)->nd_recv, RNODE_MATCH3(node)->nd_value);
case NODE_MATCH: case NODE_MATCH:
case NODE_LIT: case NODE_LIT:
return rb_ary_new_from_args(1, RNODE_LIT(node)->nd_lit);
case NODE_STR: case NODE_STR:
case NODE_XSTR: case NODE_XSTR:
return rb_ary_new_from_args(1, RNODE_LIT(node)->nd_lit); return rb_ary_new_from_args(1, rb_node_str_string_val(node));
case NODE_INTEGER: case NODE_INTEGER:
return rb_ary_new_from_args(1, rb_node_integer_literal_val(node)); return rb_ary_new_from_args(1, rb_node_integer_literal_val(node));
case NODE_FLOAT: case NODE_FLOAT:
@ -579,7 +580,7 @@ node_children(rb_ast_t *ast, const NODE *node)
head = NEW_CHILD(ast, n->nd_head); head = NEW_CHILD(ast, n->nd_head);
next = NEW_CHILD(ast, n->nd_next); next = NEW_CHILD(ast, n->nd_next);
} }
return rb_ary_new_from_args(3, RNODE_DSTR(node)->nd_lit, head, next); return rb_ary_new_from_args(3, rb_node_dstr_string_val(node), head, next);
} }
case NODE_SYM: case NODE_SYM:
return rb_ary_new_from_args(1, rb_node_sym_string_val(node)); return rb_ary_new_from_args(1, rb_node_sym_string_val(node));

View File

@ -838,7 +838,7 @@ get_string_value(const NODE *node)
{ {
switch (nd_type(node)) { switch (nd_type(node)) {
case NODE_STR: case NODE_STR:
return RNODE_STR(node)->nd_lit; return rb_node_str_string_val(node);
case NODE_FILE: case NODE_FILE:
return rb_node_file_path_val(node); return rb_node_file_path_val(node);
default: default:
@ -4310,7 +4310,7 @@ static int
compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int *cntp) compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int *cntp)
{ {
const struct RNode_LIST *list = RNODE_DSTR(node)->nd_next; const struct RNode_LIST *list = RNODE_DSTR(node)->nd_next;
VALUE lit = RNODE_DSTR(node)->nd_lit; VALUE lit = rb_node_dstr_string_val(node);
LINK_ELEMENT *first_lit = 0; LINK_ELEMENT *first_lit = 0;
int cnt = 0; int cnt = 0;
@ -4331,7 +4331,7 @@ compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *cons
while (list) { while (list) {
const NODE *const head = list->nd_head; const NODE *const head = list->nd_head;
if (nd_type_p(head, NODE_STR)) { if (nd_type_p(head, NODE_STR)) {
lit = rb_fstring(RNODE_STR(head)->nd_lit); lit = rb_fstring(rb_node_str_string_val(head));
ADD_INSN1(ret, head, putobject, lit); ADD_INSN1(ret, head, putobject, lit);
RB_OBJ_WRITTEN(iseq, Qundef, lit); RB_OBJ_WRITTEN(iseq, Qundef, lit);
lit = Qnil; lit = Qnil;
@ -4370,7 +4370,7 @@ compile_dstr(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node)
{ {
int cnt; int cnt;
if (!RNODE_DSTR(node)->nd_next) { if (!RNODE_DSTR(node)->nd_next) {
VALUE lit = rb_fstring(RNODE_DSTR(node)->nd_lit); VALUE lit = rb_fstring(rb_node_dstr_string_val(node));
ADD_INSN1(ret, node, putstring, lit); ADD_INSN1(ret, node, putstring, lit);
RB_OBJ_WRITTEN(iseq, Qundef, lit); RB_OBJ_WRITTEN(iseq, Qundef, lit);
} }
@ -4387,15 +4387,14 @@ compile_dregx(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i
int cnt; int cnt;
if (!RNODE_DREGX(node)->nd_next) { if (!RNODE_DREGX(node)->nd_next) {
VALUE match = RNODE_DREGX(node)->nd_lit;
if (RB_TYPE_P(match, T_REGEXP)) {
if (!popped) { if (!popped) {
VALUE src = rb_node_dregx_string_val(node);
VALUE match = rb_reg_compile(src, (int)RNODE_DREGX(node)->nd_cflag, NULL, 0);
ADD_INSN1(ret, node, putobject, match); ADD_INSN1(ret, node, putobject, match);
RB_OBJ_WRITTEN(iseq, Qundef, match); RB_OBJ_WRITTEN(iseq, Qundef, match);
} }
return COMPILE_OK; return COMPILE_OK;
} }
}
CHECK(compile_dstr_fragments(iseq, ret, node, &cnt)); CHECK(compile_dstr_fragments(iseq, ret, node, &cnt));
ADD_INSN2(ret, node, toregexp, INT2FIX(RNODE_DREGX(node)->nd_cflag), INT2FIX(cnt)); ADD_INSN2(ret, node, toregexp, INT2FIX(RNODE_DREGX(node)->nd_cflag), INT2FIX(cnt));
@ -5135,7 +5134,7 @@ rb_node_case_when_optimizable_literal(const NODE *const node)
case NODE_LINE: case NODE_LINE:
return rb_node_line_lineno_val(node); return rb_node_line_lineno_val(node);
case NODE_STR: case NODE_STR:
return rb_fstring(RNODE_STR(node)->nd_lit); return rb_fstring(rb_node_str_string_val(node));
case NODE_FILE: case NODE_FILE:
return rb_fstring(rb_node_file_path_val(node)); return rb_fstring(rb_node_file_path_val(node));
} }
@ -10364,7 +10363,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
} }
case NODE_XSTR:{ case NODE_XSTR:{
ADD_CALL_RECEIVER(ret, node); ADD_CALL_RECEIVER(ret, node);
VALUE str = rb_fstring(RNODE_XSTR(node)->nd_lit); VALUE str = rb_fstring(rb_node_str_string_val(node));
ADD_INSN1(ret, node, putobject, str); ADD_INSN1(ret, node, putobject, str);
RB_OBJ_WRITTEN(iseq, Qundef, str); RB_OBJ_WRITTEN(iseq, Qundef, str);
ADD_CALL(ret, node, idBackquote, INT2FIX(1)); ADD_CALL(ret, node, idBackquote, INT2FIX(1));

View File

@ -76,7 +76,10 @@ RUBY_SYMBOL_EXPORT_BEGIN
VALUE rb_str_new_parser_string(rb_parser_string_t *str); VALUE rb_str_new_parser_string(rb_parser_string_t *str);
RUBY_SYMBOL_EXPORT_END RUBY_SYMBOL_EXPORT_END
VALUE rb_node_str_string_val(const NODE *);
VALUE rb_node_sym_string_val(const NODE *); VALUE rb_node_sym_string_val(const NODE *);
VALUE rb_node_dstr_string_val(const NODE *);
VALUE rb_node_dregx_string_val(const NODE *);
VALUE rb_node_line_lineno_val(const NODE *); VALUE rb_node_line_lineno_val(const NODE *);
VALUE rb_node_file_path_val(const NODE *); VALUE rb_node_file_path_val(const NODE *);
VALUE rb_node_encoding_val(const NODE *); VALUE rb_node_encoding_val(const NODE *);

38
node.c
View File

@ -172,6 +172,8 @@ struct rb_ast_local_table_link {
static void static void
parser_string_free(rb_ast_t *ast, rb_parser_string_t *str) parser_string_free(rb_ast_t *ast, rb_parser_string_t *str)
{ {
if (!str) return;
xfree(str->ptr);
xfree(str); xfree(str);
} }
@ -179,9 +181,27 @@ static void
free_ast_value(rb_ast_t *ast, void *ctx, NODE *node) free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
{ {
switch (nd_type(node)) { switch (nd_type(node)) {
case NODE_STR:
parser_string_free(ast, RNODE_STR(node)->string);
break;
case NODE_DSTR:
parser_string_free(ast, RNODE_DSTR(node)->string);
break;
case NODE_XSTR:
parser_string_free(ast, RNODE_XSTR(node)->string);
break;
case NODE_DXSTR:
parser_string_free(ast, RNODE_DXSTR(node)->string);
break;
case NODE_SYM: case NODE_SYM:
parser_string_free(ast, RNODE_SYM(node)->string); parser_string_free(ast, RNODE_SYM(node)->string);
break; break;
case NODE_DSYM:
parser_string_free(ast, RNODE_DSYM(node)->string);
break;
case NODE_DREGX:
parser_string_free(ast, RNODE_DREGX(node)->string);
break;
case NODE_FILE: case NODE_FILE:
parser_string_free(ast, RNODE_FILE(node)->path); parser_string_free(ast, RNODE_FILE(node)->path);
break; break;
@ -251,12 +271,6 @@ nodetype_markable_p(enum node_type type)
switch (type) { switch (type) {
case NODE_MATCH: case NODE_MATCH:
case NODE_LIT: case NODE_LIT:
case NODE_STR:
case NODE_XSTR:
case NODE_DSTR:
case NODE_DXSTR:
case NODE_DREGX:
case NODE_DSYM:
return true; return true;
default: default:
return false; return false;
@ -363,12 +377,6 @@ mark_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
switch (nd_type(node)) { switch (nd_type(node)) {
case NODE_MATCH: case NODE_MATCH:
case NODE_LIT: case NODE_LIT:
case NODE_STR:
case NODE_XSTR:
case NODE_DSTR:
case NODE_DXSTR:
case NODE_DREGX:
case NODE_DSYM:
rb_gc_mark_movable(RNODE_LIT(node)->nd_lit); rb_gc_mark_movable(RNODE_LIT(node)->nd_lit);
break; break;
default: default:
@ -386,12 +394,6 @@ update_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
switch (nd_type(node)) { switch (nd_type(node)) {
case NODE_MATCH: case NODE_MATCH:
case NODE_LIT: case NODE_LIT:
case NODE_STR:
case NODE_XSTR:
case NODE_DSTR:
case NODE_DXSTR:
case NODE_DREGX:
case NODE_DSYM:
RNODE_LIT(node)->nd_lit = rb_gc_location(RNODE_LIT(node)->nd_lit); RNODE_LIT(node)->nd_lit = rb_gc_location(RNODE_LIT(node)->nd_lit);
break; break;
default: default:

View File

@ -707,18 +707,19 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
ANN("literal"); ANN("literal");
ANN("format: [nd_lit]"); ANN("format: [nd_lit]");
ANN("example: :sym, /foo/"); ANN("example: :sym, /foo/");
goto lit; F_LIT(nd_lit, RNODE_LIT, "literal");
return;
case NODE_STR: case NODE_STR:
ANN("string literal"); ANN("string literal");
ANN("format: [nd_lit]"); ANN("format: [nd_lit]");
ANN("example: 'foo'"); ANN("example: 'foo'");
goto lit; goto str;
case NODE_XSTR: case NODE_XSTR:
ANN("xstring literal"); ANN("xstring literal");
ANN("format: [nd_lit]"); ANN("format: [nd_lit]");
ANN("example: `foo`"); ANN("example: `foo`");
lit: str:
F_LIT(nd_lit, RNODE_LIT, "literal"); F_VALUE(string, rb_node_str_string_val(node), "literal");
return; return;
case NODE_INTEGER: case NODE_INTEGER:
@ -777,7 +778,7 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
ANN("format: [nd_lit]"); ANN("format: [nd_lit]");
ANN("example: :\"foo#{ bar }baz\""); ANN("example: :\"foo#{ bar }baz\"");
dlit: dlit:
F_LIT(nd_lit, RNODE_DSTR, "preceding string"); F_VALUE(string, rb_node_dstr_string_val(node), "preceding string");
if (!RNODE_DSTR(node)->nd_next) return; if (!RNODE_DSTR(node)->nd_next) return;
F_NODE(nd_next->nd_head, RNODE_DSTR, "interpolation"); F_NODE(nd_next->nd_head, RNODE_DSTR, "interpolation");
LAST_NODE; LAST_NODE;

640
parse.y

File diff suppressed because it is too large Load Diff

View File

@ -166,6 +166,12 @@ mbclen_charfound_p(int len)
return MBCLEN_CHARFOUND_P(len); return MBCLEN_CHARFOUND_P(len);
} }
static int
mbclen_charfound_len(int len)
{
return MBCLEN_CHARFOUND_LEN(len);
}
static const char * static const char *
enc_name(void *enc) enc_name(void *enc)
{ {
@ -598,6 +604,7 @@ static const rb_parser_config_t rb_global_parser_config = {
.enc_isalnum = enc_isalnum, .enc_isalnum = enc_isalnum,
.enc_precise_mbclen = enc_precise_mbclen, .enc_precise_mbclen = enc_precise_mbclen,
.mbclen_charfound_p = mbclen_charfound_p, .mbclen_charfound_p = mbclen_charfound_p,
.mbclen_charfound_len = mbclen_charfound_len,
.enc_name = enc_name, .enc_name = enc_name,
.enc_prev_char = enc_prev_char, .enc_prev_char = enc_prev_char,
.enc_get = enc_get, .enc_get = enc_get,
@ -988,6 +995,13 @@ rb_node_imaginary_literal_val(const NODE *n)
return lit; return lit;
} }
VALUE
rb_node_str_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_STR(node)->string;
return rb_str_new_parser_string(str);
}
VALUE VALUE
rb_node_sym_string_val(const NODE *node) rb_node_sym_string_val(const NODE *node)
{ {
@ -995,6 +1009,20 @@ rb_node_sym_string_val(const NODE *node)
return ID2SYM(rb_intern3(str->ptr, str->len, str->enc)); return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
} }
VALUE
rb_node_dstr_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DSTR(node)->string;
return str ? rb_str_new_parser_string(str) : Qnil;
}
VALUE
rb_node_dregx_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DREGX(node)->string;
return rb_str_new_parser_string(str);
}
VALUE VALUE
rb_node_line_lineno_val(const NODE *node) rb_node_line_lineno_val(const NODE *node)
{ {

View File

@ -37,12 +37,21 @@
/* /*
* Parser String * Parser String
*/ */
enum rb_parser_string_coderange_type {
/** The object's coderange is unclear yet. */
RB_PARSER_ENC_CODERANGE_UNKNOWN = 0,
RB_PARSER_ENC_CODERANGE_7BIT = 1,
RB_PARSER_ENC_CODERANGE_VALID = 2,
RB_PARSER_ENC_CODERANGE_BROKEN = 3
};
typedef struct rb_parser_string { typedef struct rb_parser_string {
enum rb_parser_string_coderange_type coderange;
rb_encoding *enc; rb_encoding *enc;
/* Length of the string, not including terminating NUL character. */ /* Length of the string, not including terminating NUL character. */
long len; long len;
/* Pointer to the contents of the string. */ /* Pointer to the contents of the string. */
char ptr[FLEX_ARY_LEN]; char *ptr;
} rb_parser_string_t; } rb_parser_string_t;
/* /*
@ -605,7 +614,7 @@ typedef struct RNode_BACK_REF {
long nd_nth; long nd_nth;
} rb_node_back_ref_t; } rb_node_back_ref_t;
/* RNode_MATCH, RNode_LIT, RNode_STR and RNode_XSTR should be same structure */ /* RNode_MATCH and RNode_LIT should be same structure */
typedef struct RNode_MATCH { typedef struct RNode_MATCH {
NODE node; NODE node;
@ -673,17 +682,18 @@ typedef struct RNode_IMAGINARY {
enum rb_numeric_type type; enum rb_numeric_type type;
} rb_node_imaginary_t; } rb_node_imaginary_t;
/* RNode_STR and RNode_XSTR should be same structure */
typedef struct RNode_STR { typedef struct RNode_STR {
NODE node; NODE node;
VALUE nd_lit; struct rb_parser_string *string;
} rb_node_str_t; } rb_node_str_t;
/* RNode_DSTR, RNode_DXSTR and RNode_DSYM should be same structure */ /* RNode_DSTR, RNode_DXSTR and RNode_DSYM should be same structure */
typedef struct RNode_DSTR { typedef struct RNode_DSTR {
NODE node; NODE node;
VALUE nd_lit; struct rb_parser_string *string;
union { union {
long nd_alen; long nd_alen;
struct RNode *nd_end; /* Second dstr node has this structure. See also RNode_LIST */ struct RNode *nd_end; /* Second dstr node has this structure. See also RNode_LIST */
@ -694,13 +704,13 @@ typedef struct RNode_DSTR {
typedef struct RNode_XSTR { typedef struct RNode_XSTR {
NODE node; NODE node;
VALUE nd_lit; struct rb_parser_string *string;
} rb_node_xstr_t; } rb_node_xstr_t;
typedef struct RNode_DXSTR { typedef struct RNode_DXSTR {
NODE node; NODE node;
VALUE nd_lit; struct rb_parser_string *string;
long nd_alen; long nd_alen;
struct RNode_LIST *nd_next; struct RNode_LIST *nd_next;
} rb_node_dxstr_t; } rb_node_dxstr_t;
@ -714,7 +724,7 @@ typedef struct RNode_EVSTR {
typedef struct RNode_DREGX { typedef struct RNode_DREGX {
NODE node; NODE node;
VALUE nd_lit; struct rb_parser_string *string;
ID nd_cflag; ID nd_cflag;
struct RNode_LIST *nd_next; struct RNode_LIST *nd_next;
} rb_node_dregx_t; } rb_node_dregx_t;
@ -950,7 +960,7 @@ typedef struct RNode_SYM {
typedef struct RNode_DSYM { typedef struct RNode_DSYM {
NODE node; NODE node;
VALUE nd_lit; struct rb_parser_string *string;
long nd_alen; long nd_alen;
struct RNode_LIST *nd_next; struct RNode_LIST *nd_next;
} rb_node_dsym_t; } rb_node_dsym_t;
@ -1329,6 +1339,7 @@ typedef struct rb_parser_config_struct {
int (*enc_isalnum)(OnigCodePoint c, rb_encoding *enc); int (*enc_isalnum)(OnigCodePoint c, rb_encoding *enc);
int (*enc_precise_mbclen)(const char *p, const char *e, rb_encoding *enc); int (*enc_precise_mbclen)(const char *p, const char *e, rb_encoding *enc);
int (*mbclen_charfound_p)(int len); int (*mbclen_charfound_p)(int len);
int (*mbclen_charfound_len)(int len);
const char *(*enc_name)(rb_encoding *enc); const char *(*enc_name)(rb_encoding *enc);
char *(*enc_prev_char)(const char *s, const char *p, const char *e, rb_encoding *enc); char *(*enc_prev_char)(const char *s, const char *p, const char *e, rb_encoding *enc);
rb_encoding* (*enc_get)(VALUE obj); rb_encoding* (*enc_get)(VALUE obj);

View File

@ -235,6 +235,7 @@ struct rb_imemo_tmpbuf_struct {
#define rb_enc_isalnum p->config->enc_isalnum #define rb_enc_isalnum p->config->enc_isalnum
#define rb_enc_precise_mbclen p->config->enc_precise_mbclen #define rb_enc_precise_mbclen p->config->enc_precise_mbclen
#define MBCLEN_CHARFOUND_P p->config->mbclen_charfound_p #define MBCLEN_CHARFOUND_P p->config->mbclen_charfound_p
#define MBCLEN_CHARFOUND_LEN p->config->mbclen_charfound_len
#define rb_enc_name p->config->enc_name #define rb_enc_name p->config->enc_name
#define rb_enc_prev_char p->config->enc_prev_char #define rb_enc_prev_char p->config->enc_prev_char
#define rb_enc_get p->config->enc_get #define rb_enc_get p->config->enc_get