Remove ruby object from string nodes

String nodes holds ruby string object on `VALUE nd_lit`.
This commit changes it to `struct rb_parser_string *string`
to reduce dependency on ruby object.
Sometimes these strings are concatenated with other string
therefore string concatenate functions are needed.
This commit is contained in:
yui-knk 2024-02-03 00:50:02 +09:00 committed by Yuichiro Kaneko
parent f7a407cabd
commit 33c1e082d0
9 changed files with 598 additions and 180 deletions

5
ast.c
View File

@ -555,9 +555,10 @@ node_children(rb_ast_t *ast, const NODE *node)
return rb_ary_new_from_node_args(ast, 2, RNODE_MATCH3(node)->nd_recv, RNODE_MATCH3(node)->nd_value);
case NODE_MATCH:
case NODE_LIT:
return rb_ary_new_from_args(1, RNODE_LIT(node)->nd_lit);
case NODE_STR:
case NODE_XSTR:
return rb_ary_new_from_args(1, RNODE_LIT(node)->nd_lit);
return rb_ary_new_from_args(1, rb_node_str_string_val(node));
case NODE_INTEGER:
return rb_ary_new_from_args(1, rb_node_integer_literal_val(node));
case NODE_FLOAT:
@ -579,7 +580,7 @@ node_children(rb_ast_t *ast, const NODE *node)
head = NEW_CHILD(ast, n->nd_head);
next = NEW_CHILD(ast, n->nd_next);
}
return rb_ary_new_from_args(3, RNODE_DSTR(node)->nd_lit, head, next);
return rb_ary_new_from_args(3, rb_node_dstr_string_val(node), head, next);
}
case NODE_SYM:
return rb_ary_new_from_args(1, rb_node_sym_string_val(node));

View File

@ -838,7 +838,7 @@ get_string_value(const NODE *node)
{
switch (nd_type(node)) {
case NODE_STR:
return RNODE_STR(node)->nd_lit;
return rb_node_str_string_val(node);
case NODE_FILE:
return rb_node_file_path_val(node);
default:
@ -4310,7 +4310,7 @@ static int
compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, int *cntp)
{
const struct RNode_LIST *list = RNODE_DSTR(node)->nd_next;
VALUE lit = RNODE_DSTR(node)->nd_lit;
VALUE lit = rb_node_dstr_string_val(node);
LINK_ELEMENT *first_lit = 0;
int cnt = 0;
@ -4331,7 +4331,7 @@ compile_dstr_fragments(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *cons
while (list) {
const NODE *const head = list->nd_head;
if (nd_type_p(head, NODE_STR)) {
lit = rb_fstring(RNODE_STR(head)->nd_lit);
lit = rb_fstring(rb_node_str_string_val(head));
ADD_INSN1(ret, head, putobject, lit);
RB_OBJ_WRITTEN(iseq, Qundef, lit);
lit = Qnil;
@ -4370,7 +4370,7 @@ compile_dstr(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node)
{
int cnt;
if (!RNODE_DSTR(node)->nd_next) {
VALUE lit = rb_fstring(RNODE_DSTR(node)->nd_lit);
VALUE lit = rb_fstring(rb_node_dstr_string_val(node));
ADD_INSN1(ret, node, putstring, lit);
RB_OBJ_WRITTEN(iseq, Qundef, lit);
}
@ -4387,14 +4387,13 @@ compile_dregx(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i
int cnt;
if (!RNODE_DREGX(node)->nd_next) {
VALUE match = RNODE_DREGX(node)->nd_lit;
if (RB_TYPE_P(match, T_REGEXP)) {
if (!popped) {
ADD_INSN1(ret, node, putobject, match);
RB_OBJ_WRITTEN(iseq, Qundef, match);
}
return COMPILE_OK;
if (!popped) {
VALUE src = rb_node_dregx_string_val(node);
VALUE match = rb_reg_compile(src, (int)RNODE_DREGX(node)->nd_cflag, NULL, 0);
ADD_INSN1(ret, node, putobject, match);
RB_OBJ_WRITTEN(iseq, Qundef, match);
}
return COMPILE_OK;
}
CHECK(compile_dstr_fragments(iseq, ret, node, &cnt));
@ -5135,7 +5134,7 @@ rb_node_case_when_optimizable_literal(const NODE *const node)
case NODE_LINE:
return rb_node_line_lineno_val(node);
case NODE_STR:
return rb_fstring(RNODE_STR(node)->nd_lit);
return rb_fstring(rb_node_str_string_val(node));
case NODE_FILE:
return rb_fstring(rb_node_file_path_val(node));
}
@ -10364,7 +10363,7 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
}
case NODE_XSTR:{
ADD_CALL_RECEIVER(ret, node);
VALUE str = rb_fstring(RNODE_XSTR(node)->nd_lit);
VALUE str = rb_fstring(rb_node_str_string_val(node));
ADD_INSN1(ret, node, putobject, str);
RB_OBJ_WRITTEN(iseq, Qundef, str);
ADD_CALL(ret, node, idBackquote, INT2FIX(1));

View File

@ -76,7 +76,10 @@ RUBY_SYMBOL_EXPORT_BEGIN
VALUE rb_str_new_parser_string(rb_parser_string_t *str);
RUBY_SYMBOL_EXPORT_END
VALUE rb_node_str_string_val(const NODE *);
VALUE rb_node_sym_string_val(const NODE *);
VALUE rb_node_dstr_string_val(const NODE *);
VALUE rb_node_dregx_string_val(const NODE *);
VALUE rb_node_line_lineno_val(const NODE *);
VALUE rb_node_file_path_val(const NODE *);
VALUE rb_node_encoding_val(const NODE *);

38
node.c
View File

@ -172,6 +172,8 @@ struct rb_ast_local_table_link {
static void
parser_string_free(rb_ast_t *ast, rb_parser_string_t *str)
{
if (!str) return;
xfree(str->ptr);
xfree(str);
}
@ -179,9 +181,27 @@ static void
free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
{
switch (nd_type(node)) {
case NODE_STR:
parser_string_free(ast, RNODE_STR(node)->string);
break;
case NODE_DSTR:
parser_string_free(ast, RNODE_DSTR(node)->string);
break;
case NODE_XSTR:
parser_string_free(ast, RNODE_XSTR(node)->string);
break;
case NODE_DXSTR:
parser_string_free(ast, RNODE_DXSTR(node)->string);
break;
case NODE_SYM:
parser_string_free(ast, RNODE_SYM(node)->string);
break;
case NODE_DSYM:
parser_string_free(ast, RNODE_DSYM(node)->string);
break;
case NODE_DREGX:
parser_string_free(ast, RNODE_DREGX(node)->string);
break;
case NODE_FILE:
parser_string_free(ast, RNODE_FILE(node)->path);
break;
@ -251,12 +271,6 @@ nodetype_markable_p(enum node_type type)
switch (type) {
case NODE_MATCH:
case NODE_LIT:
case NODE_STR:
case NODE_XSTR:
case NODE_DSTR:
case NODE_DXSTR:
case NODE_DREGX:
case NODE_DSYM:
return true;
default:
return false;
@ -363,12 +377,6 @@ mark_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
switch (nd_type(node)) {
case NODE_MATCH:
case NODE_LIT:
case NODE_STR:
case NODE_XSTR:
case NODE_DSTR:
case NODE_DXSTR:
case NODE_DREGX:
case NODE_DSYM:
rb_gc_mark_movable(RNODE_LIT(node)->nd_lit);
break;
default:
@ -386,12 +394,6 @@ update_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
switch (nd_type(node)) {
case NODE_MATCH:
case NODE_LIT:
case NODE_STR:
case NODE_XSTR:
case NODE_DSTR:
case NODE_DXSTR:
case NODE_DREGX:
case NODE_DSYM:
RNODE_LIT(node)->nd_lit = rb_gc_location(RNODE_LIT(node)->nd_lit);
break;
default:

View File

@ -707,18 +707,19 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
ANN("literal");
ANN("format: [nd_lit]");
ANN("example: :sym, /foo/");
goto lit;
F_LIT(nd_lit, RNODE_LIT, "literal");
return;
case NODE_STR:
ANN("string literal");
ANN("format: [nd_lit]");
ANN("example: 'foo'");
goto lit;
goto str;
case NODE_XSTR:
ANN("xstring literal");
ANN("format: [nd_lit]");
ANN("example: `foo`");
lit:
F_LIT(nd_lit, RNODE_LIT, "literal");
str:
F_VALUE(string, rb_node_str_string_val(node), "literal");
return;
case NODE_INTEGER:
@ -777,7 +778,7 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
ANN("format: [nd_lit]");
ANN("example: :\"foo#{ bar }baz\"");
dlit:
F_LIT(nd_lit, RNODE_DSTR, "preceding string");
F_VALUE(string, rb_node_dstr_string_val(node), "preceding string");
if (!RNODE_DSTR(node)->nd_next) return;
F_NODE(nd_next->nd_head, RNODE_DSTR, "interpolation");
LAST_NODE;

640
parse.y

File diff suppressed because it is too large Load Diff

View File

@ -166,6 +166,12 @@ mbclen_charfound_p(int len)
return MBCLEN_CHARFOUND_P(len);
}
static int
mbclen_charfound_len(int len)
{
return MBCLEN_CHARFOUND_LEN(len);
}
static const char *
enc_name(void *enc)
{
@ -598,6 +604,7 @@ static const rb_parser_config_t rb_global_parser_config = {
.enc_isalnum = enc_isalnum,
.enc_precise_mbclen = enc_precise_mbclen,
.mbclen_charfound_p = mbclen_charfound_p,
.mbclen_charfound_len = mbclen_charfound_len,
.enc_name = enc_name,
.enc_prev_char = enc_prev_char,
.enc_get = enc_get,
@ -988,6 +995,13 @@ rb_node_imaginary_literal_val(const NODE *n)
return lit;
}
VALUE
rb_node_str_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_STR(node)->string;
return rb_str_new_parser_string(str);
}
VALUE
rb_node_sym_string_val(const NODE *node)
{
@ -995,6 +1009,20 @@ rb_node_sym_string_val(const NODE *node)
return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
}
VALUE
rb_node_dstr_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DSTR(node)->string;
return str ? rb_str_new_parser_string(str) : Qnil;
}
VALUE
rb_node_dregx_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DREGX(node)->string;
return rb_str_new_parser_string(str);
}
VALUE
rb_node_line_lineno_val(const NODE *node)
{

View File

@ -37,12 +37,21 @@
/*
* Parser String
*/
enum rb_parser_string_coderange_type {
/** The object's coderange is unclear yet. */
RB_PARSER_ENC_CODERANGE_UNKNOWN = 0,
RB_PARSER_ENC_CODERANGE_7BIT = 1,
RB_PARSER_ENC_CODERANGE_VALID = 2,
RB_PARSER_ENC_CODERANGE_BROKEN = 3
};
typedef struct rb_parser_string {
enum rb_parser_string_coderange_type coderange;
rb_encoding *enc;
/* Length of the string, not including terminating NUL character. */
long len;
/* Pointer to the contents of the string. */
char ptr[FLEX_ARY_LEN];
char *ptr;
} rb_parser_string_t;
/*
@ -605,7 +614,7 @@ typedef struct RNode_BACK_REF {
long nd_nth;
} rb_node_back_ref_t;
/* RNode_MATCH, RNode_LIT, RNode_STR and RNode_XSTR should be same structure */
/* RNode_MATCH and RNode_LIT should be same structure */
typedef struct RNode_MATCH {
NODE node;
@ -673,17 +682,18 @@ typedef struct RNode_IMAGINARY {
enum rb_numeric_type type;
} rb_node_imaginary_t;
/* RNode_STR and RNode_XSTR should be same structure */
typedef struct RNode_STR {
NODE node;
VALUE nd_lit;
struct rb_parser_string *string;
} rb_node_str_t;
/* RNode_DSTR, RNode_DXSTR and RNode_DSYM should be same structure */
typedef struct RNode_DSTR {
NODE node;
VALUE nd_lit;
struct rb_parser_string *string;
union {
long nd_alen;
struct RNode *nd_end; /* Second dstr node has this structure. See also RNode_LIST */
@ -694,13 +704,13 @@ typedef struct RNode_DSTR {
typedef struct RNode_XSTR {
NODE node;
VALUE nd_lit;
struct rb_parser_string *string;
} rb_node_xstr_t;
typedef struct RNode_DXSTR {
NODE node;
VALUE nd_lit;
struct rb_parser_string *string;
long nd_alen;
struct RNode_LIST *nd_next;
} rb_node_dxstr_t;
@ -714,7 +724,7 @@ typedef struct RNode_EVSTR {
typedef struct RNode_DREGX {
NODE node;
VALUE nd_lit;
struct rb_parser_string *string;
ID nd_cflag;
struct RNode_LIST *nd_next;
} rb_node_dregx_t;
@ -950,7 +960,7 @@ typedef struct RNode_SYM {
typedef struct RNode_DSYM {
NODE node;
VALUE nd_lit;
struct rb_parser_string *string;
long nd_alen;
struct RNode_LIST *nd_next;
} rb_node_dsym_t;
@ -1329,6 +1339,7 @@ typedef struct rb_parser_config_struct {
int (*enc_isalnum)(OnigCodePoint c, rb_encoding *enc);
int (*enc_precise_mbclen)(const char *p, const char *e, rb_encoding *enc);
int (*mbclen_charfound_p)(int len);
int (*mbclen_charfound_len)(int len);
const char *(*enc_name)(rb_encoding *enc);
char *(*enc_prev_char)(const char *s, const char *p, const char *e, rb_encoding *enc);
rb_encoding* (*enc_get)(VALUE obj);

View File

@ -235,6 +235,7 @@ struct rb_imemo_tmpbuf_struct {
#define rb_enc_isalnum p->config->enc_isalnum
#define rb_enc_precise_mbclen p->config->enc_precise_mbclen
#define MBCLEN_CHARFOUND_P p->config->mbclen_charfound_p
#define MBCLEN_CHARFOUND_LEN p->config->mbclen_charfound_len
#define rb_enc_name p->config->enc_name
#define rb_enc_prev_char p->config->enc_prev_char
#define rb_enc_get p->config->enc_get