From 9b40f42c22232aaae1b2b17bd6118eacc4c0bee3 Mon Sep 17 00:00:00 2001 From: "S.H" Date: Sat, 27 Jan 2024 17:11:10 +0900 Subject: [PATCH] Introduce `NODE_ENCODING` `__ENCODING__ `was managed by `NODE_LIT` with Encoding object. Introduce `NODE_ENCODING` for 1. `__ENCODING__` is detectable from AST Node. 2. Reduce dependency Ruby object for parse.y --- ast.c | 2 ++ compile.c | 15 +++++++++++++++ internal/ruby_parser.h | 1 + node_dump.c | 7 +++++++ parse.y | 28 +++++++++++++++++++++++++--- ruby_parser.c | 6 ++++++ rubyparser.h | 7 +++++++ 7 files changed, 63 insertions(+), 3 deletions(-) diff --git a/ast.c b/ast.c index b61dfbf4cb..579110e20a 100644 --- a/ast.c +++ b/ast.c @@ -706,6 +706,8 @@ node_children(rb_ast_t *ast, const NODE *node) return rb_ary_new_from_args(1, rb_node_line_lineno_val(node)); case NODE_FILE: return rb_ary_new_from_args(1, rb_node_file_path_val(node)); + case NODE_ENCODING: + return rb_ary_new_from_args(1, rb_node_encoding_val(node)); case NODE_ERROR: return rb_ary_new_from_node_args(ast, 0); case NODE_ARGS_AUX: diff --git a/compile.c b/compile.c index 4ae1174caa..eea92b3638 100644 --- a/compile.c +++ b/compile.c @@ -1960,6 +1960,9 @@ iseq_set_arguments_keywords(rb_iseq_t *iseq, LINK_ANCHOR *const optargs, case NODE_IMAGINARY: dv = rb_node_imaginary_literal_val(val_node); break; + case NODE_ENCODING: + dv = rb_node_encoding_val(val_node); + break; case NODE_NIL: dv = Qnil; break; @@ -4512,6 +4515,7 @@ compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *ret, const NODE *cond, case NODE_SYM: case NODE_LINE: case NODE_FILE: + case NODE_ENCODING: case NODE_INTEGER: /* NODE_INTEGER is always true */ case NODE_FLOAT: /* NODE_FLOAT is always true */ case NODE_RATIONAL: /* NODE_RATIONAL is always true */ @@ -4722,6 +4726,7 @@ static_literal_node_p(const NODE *node, const rb_iseq_t *iseq, bool hash_key) case NODE_LIT: case NODE_SYM: case NODE_LINE: + case NODE_ENCODING: case NODE_INTEGER: case NODE_FLOAT: case NODE_RATIONAL: @@ -4760,6 +4765,8 @@ static_literal_value(const NODE *node, rb_iseq_t *iseq) return rb_node_sym_string_val(node); case NODE_LINE: return rb_node_line_lineno_val(node); + case NODE_ENCODING: + return rb_node_encoding_val(node); case NODE_FILE: case NODE_STR: if (ISEQ_COMPILE_DATA(iseq)->option->debug_frozen_string_literal || RTEST(ruby_debug)) { @@ -5790,6 +5797,7 @@ defined_expr0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, case NODE_SYM: case NODE_LINE: case NODE_FILE: + case NODE_ENCODING: case NODE_INTEGER: case NODE_FLOAT: case NODE_RATIONAL: @@ -7221,6 +7229,7 @@ iseq_compile_pattern_each(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *c case NODE_RATIONAL: case NODE_IMAGINARY: case NODE_FILE: + case NODE_ENCODING: case NODE_STR: case NODE_XSTR: case NODE_DSTR: @@ -10273,6 +10282,12 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no } break; } + case NODE_ENCODING:{ + if (!popped) { + ADD_INSN1(ret, node, putobject, rb_node_encoding_val(node)); + } + break; + } case NODE_INTEGER:{ VALUE lit = rb_node_integer_literal_val(node); debugp_param("integer", lit); diff --git a/internal/ruby_parser.h b/internal/ruby_parser.h index 018a1865de..71e8fdd849 100644 --- a/internal/ruby_parser.h +++ b/internal/ruby_parser.h @@ -79,6 +79,7 @@ RUBY_SYMBOL_EXPORT_END VALUE rb_node_sym_string_val(const NODE *); VALUE rb_node_line_lineno_val(const NODE *); VALUE rb_node_file_path_val(const NODE *); +VALUE rb_node_encoding_val(const NODE *); VALUE rb_node_integer_literal_val(const NODE *); VALUE rb_node_float_literal_val(const NODE *); diff --git a/node_dump.c b/node_dump.c index 61c1b6a14f..a6f9bff4e3 100644 --- a/node_dump.c +++ b/node_dump.c @@ -1149,6 +1149,13 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node) F_VALUE(path, rb_node_file_path_val(node), "path"); return; + case NODE_ENCODING: + ANN("encoding"); + ANN("format: [enc]"); + ANN("example: __ENCODING__"); + F_VALUE(enc, rb_node_encoding_val(node), "enc"); + break; + case NODE_ERROR: ANN("Broken input recovered by Error Tolerant mode"); return; diff --git a/parse.y b/parse.y index 8a47a61825..180c298a88 100644 --- a/parse.y +++ b/parse.y @@ -232,6 +232,8 @@ node_cdhash_hash(VALUE a) case NODE_FILE: /* Same with String in rb_iseq_cdhash_hash */ return rb_str_hash(rb_node_file_path_val(node)); + case NODE_ENCODING: + return rb_node_encoding_val(node); default: rb_bug("unexpected node: %s", ruby_node_name(type)); } @@ -1272,6 +1274,7 @@ static rb_node_error_t *rb_node_error_new(struct parser_params *p, const YYLTYPE #define NEW_FNDPTN(pre,a,post,loc) (NODE *)rb_node_fndptn_new(p,pre,a,post,loc) #define NEW_LINE(loc) (NODE *)rb_node_line_new(p,loc) #define NEW_FILE(str,loc) (NODE *)rb_node_file_new(p,str,loc) +#define NEW_ENCODING(loc) (NODE *)rb_node_encoding_new(p,loc) #define NEW_ERROR(loc) (NODE *)rb_node_error_new(p,loc) #endif @@ -6942,6 +6945,7 @@ singleton : var_ref case NODE_SYM: case NODE_LINE: case NODE_FILE: + case NODE_ENCODING: case NODE_INTEGER: case NODE_FLOAT: case NODE_RATIONAL: @@ -12589,6 +12593,15 @@ rb_node_file_new(struct parser_params *p, VALUE str, const YYLTYPE *loc) return n; } +static rb_node_encoding_t * +rb_node_encoding_new(struct parser_params *p, const YYLTYPE *loc) +{ + rb_node_encoding_t *n = NODE_NEWNODE(NODE_ENCODING, rb_node_encoding_t, loc); + n->enc = p->enc; + + return n; +} + static rb_node_cdecl_t * rb_node_cdecl_new(struct parser_params *p, ID nd_vid, NODE *nd_value, NODE *nd_else, const YYLTYPE *loc) { @@ -13195,9 +13208,7 @@ gettable(struct parser_params *p, ID id, const YYLTYPE *loc) case keyword__LINE__: return NEW_LINE(loc); case keyword__ENCODING__: - node = NEW_LIT(rb_enc_from_encoding(p->enc), loc); - RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(node)->nd_lit); - return node; + return NEW_ENCODING(loc); } switch (id_type(id)) { @@ -14059,6 +14070,8 @@ shareable_literal_value(struct parser_params *p, NODE *node) return rb_node_rational_literal_val(node); case NODE_IMAGINARY: return rb_node_imaginary_literal_val(node); + case NODE_ENCODING: + return rb_node_encoding_val(node); case NODE_LIT: return RNODE_LIT(node)->nd_lit; default: @@ -14091,6 +14104,7 @@ shareable_literal_constant(struct parser_params *p, enum shareability shareable, case NODE_FLOAT: case NODE_RATIONAL: case NODE_IMAGINARY: + case NODE_ENCODING: return value; case NODE_DSTR: @@ -14407,6 +14421,7 @@ void_expr(struct parser_params *p, NODE *node) case NODE_SYM: case NODE_LINE: case NODE_FILE: + case NODE_ENCODING: case NODE_INTEGER: case NODE_FLOAT: case NODE_RATIONAL: @@ -14551,6 +14566,7 @@ is_static_content(NODE *node) case NODE_SYM: case NODE_LINE: case NODE_FILE: + case NODE_ENCODING: case NODE_INTEGER: case NODE_FLOAT: case NODE_RATIONAL: @@ -14700,6 +14716,10 @@ cond0(struct parser_params *p, NODE *node, enum cond_type type, const YYLTYPE *l SWITCH_BY_COND_TYPE(type, warning, ""); break; + case NODE_ENCODING: + SWITCH_BY_COND_TYPE(type, warning, ""); + break; + case NODE_INTEGER: case NODE_FLOAT: case NODE_RATIONAL: @@ -15096,6 +15116,8 @@ nd_st_key(struct parser_params *p, NODE *node) return rb_node_sym_string_val(node); case NODE_LINE: return rb_node_line_lineno_val(node); + case NODE_ENCODING: + return rb_node_encoding_val(node); case NODE_FILE: return rb_node_file_path_val(node); default: diff --git a/ruby_parser.c b/ruby_parser.c index 965fe2cc1a..3a6f9ed082 100644 --- a/ruby_parser.c +++ b/ruby_parser.c @@ -1008,3 +1008,9 @@ rb_node_file_path_val(const NODE *node) { return rb_str_new_parser_string(RNODE_FILE(node)->path); } + +VALUE +rb_node_encoding_val(const NODE *node) +{ + return rb_enc_from_encoding(RNODE_ENCODING(node)->enc); +} diff --git a/rubyparser.h b/rubyparser.h index 2bd93bf0b2..e4f06a9052 100644 --- a/rubyparser.h +++ b/rubyparser.h @@ -160,6 +160,7 @@ enum node_type { NODE_ERROR, NODE_LINE, NODE_FILE, + NODE_ENCODING, NODE_RIPPER, NODE_RIPPER_VALUES, NODE_LAST @@ -1005,6 +1006,11 @@ typedef struct RNode_FILE { struct rb_parser_string *path; } rb_node_file_t; +typedef struct RNode_ENCODING { + NODE node; + rb_encoding *enc; +} rb_node_encoding_t; + typedef struct RNode_ERROR { NODE node; } rb_node_error_t; @@ -1121,6 +1127,7 @@ typedef struct RNode_ERROR { #define RNODE_FNDPTN(node) ((struct RNode_FNDPTN *)(node)) #define RNODE_LINE(node) ((struct RNode_LINE *)(node)) #define RNODE_FILE(node) ((struct RNode_FILE *)(node)) +#define RNODE_ENCODING(node) ((struct RNode_ENCODING *)(node)) #ifdef RIPPER typedef struct RNode_RIPPER {