ruby/ruby_parser.c
Jean Boussier 9e9f1d9301 Precompute embedded string literals hash code
With embedded strings we often have some space left in the slot, which
we can use to store the string Hash code.

It's probably only worth it for string literals, as they are the ones
likely to be used as hash keys.

We chose to store the Hash code right after the string terminator as to
make it easy/fast to compute, and not require one more union in RString.

```
compare-ruby: ruby 3.4.0dev (2024-04-22T06:32:21Z main f77618c1fa) [arm64-darwin23]
built-ruby: ruby 3.4.0dev (2024-04-22T10:13:03Z interned-string-ha.. 8a1a32331b) [arm64-darwin23]
last_commit=Precompute embedded string literals hash code

|            |compare-ruby|built-ruby|
|:-----------|-----------:|---------:|
|symbol      |     39.275M|   39.753M|
|            |           -|     1.01x|
|dyn_symbol  |     37.348M|   37.704M|
|            |           -|     1.01x|
|small_lit   |     29.514M|   33.948M|
|            |           -|     1.15x|
|frozen_lit  |     27.180M|   33.056M|
|            |           -|     1.22x|
|iseq_lit    |     27.391M|   32.242M|
|            |           -|     1.18x|
```

Co-Authored-By: Étienne Barrié <etienne.barrie@gmail.com>
2024-05-28 07:32:41 +02:00

1168 lines
28 KiB
C

/* This is a wrapper for parse.y */
#include "internal/parse.h"
#include "internal/re.h"
#include "internal/ruby_parser.h"
#include "node.h"
#include "rubyparser.h"
#include "internal/error.h"
#ifdef UNIVERSAL_PARSER
#include "internal.h"
#include "internal/array.h"
#include "internal/bignum.h"
#include "internal/compile.h"
#include "internal/complex.h"
#include "internal/encoding.h"
#include "internal/gc.h"
#include "internal/hash.h"
#include "internal/io.h"
#include "internal/rational.h"
#include "internal/re.h"
#include "internal/string.h"
#include "internal/symbol.h"
#include "internal/thread.h"
#include "ruby/ractor.h"
#include "ruby/ruby.h"
#include "ruby/util.h"
#include "internal.h"
#include "vm_core.h"
#include "symbol.h"
#define parser_encoding const void
static int
is_ascii_string2(VALUE str)
{
return is_ascii_string(str);
}
RBIMPL_ATTR_FORMAT(RBIMPL_PRINTF_FORMAT, 6, 0)
static VALUE
syntax_error_append(VALUE exc, VALUE file, int line, int column,
parser_encoding *enc, const char *fmt, va_list args)
{
return rb_syntax_error_append(exc, file, line, column, enc, fmt, args);
}
static int
local_defined(ID id, const void *p)
{
return rb_local_defined(id, (const rb_iseq_t *)p);
}
static int
dvar_defined(ID id, const void *p)
{
return rb_dvar_defined(id, (const rb_iseq_t *)p);
}
static int
is_usascii_enc(parser_encoding *enc)
{
return rb_is_usascii_enc(enc);
}
static int
is_local_id2(ID id)
{
return is_local_id(id);
}
static int
is_attrset_id2(ID id)
{
return is_attrset_id(id);
}
static int
is_notop_id2(ID id)
{
return is_notop_id(id);
}
static VALUE
enc_str_new(const char *ptr, long len, parser_encoding *enc)
{
return rb_enc_str_new(ptr, len, enc);
}
static int
enc_isalnum(OnigCodePoint c, parser_encoding *enc)
{
return rb_enc_isalnum(c, enc);
}
static int
enc_precise_mbclen(const char *p, const char *e, parser_encoding *enc)
{
return rb_enc_precise_mbclen(p, e, enc);
}
static int
mbclen_charfound_p(int len)
{
return MBCLEN_CHARFOUND_P(len);
}
static int
mbclen_charfound_len(int len)
{
return MBCLEN_CHARFOUND_LEN(len);
}
static const char *
enc_name(parser_encoding *enc)
{
return rb_enc_name(enc);
}
static char *
enc_prev_char(const char *s, const char *p, const char *e, parser_encoding *enc)
{
return rb_enc_prev_char(s, p, e, enc);
}
static parser_encoding *
enc_get(VALUE obj)
{
return rb_enc_get(obj);
}
static int
enc_asciicompat(parser_encoding *enc)
{
return rb_enc_asciicompat(enc);
}
static parser_encoding *
utf8_encoding(void)
{
return rb_utf8_encoding();
}
static VALUE
enc_associate(VALUE obj, parser_encoding *enc)
{
return rb_enc_associate(obj, enc);
}
static parser_encoding *
ascii8bit_encoding(void)
{
return rb_ascii8bit_encoding();
}
static int
enc_codelen(int c, parser_encoding *enc)
{
return rb_enc_codelen(c, enc);
}
static int
enc_mbcput(unsigned int c, void *buf, parser_encoding *enc)
{
return rb_enc_mbcput(c, buf, enc);
}
static int
enc_mbclen(const char *p, const char *e, parser_encoding *enc)
{
return rb_enc_mbclen(p, e, enc);
}
static parser_encoding *
enc_from_index(int idx)
{
return rb_enc_from_index(idx);
}
static int
enc_isspace(OnigCodePoint c, parser_encoding *enc)
{
return rb_enc_isspace(c, enc);
}
static ID
intern3(const char *name, long len, parser_encoding *enc)
{
return rb_intern3(name, len, enc);
}
static parser_encoding *
usascii_encoding(void)
{
return rb_usascii_encoding();
}
static int
enc_symname_type(const char *name, long len, parser_encoding *enc, unsigned int allowed_attrset)
{
return rb_enc_symname_type(name, len, enc, allowed_attrset);
}
typedef struct {
struct parser_params *parser;
rb_encoding *enc;
NODE *succ_block;
const rb_code_location_t *loc;
} reg_named_capture_assign_t;
static int
reg_named_capture_assign_iter(const OnigUChar *name, const OnigUChar *name_end,
int back_num, int *back_refs, OnigRegex regex, void *arg0)
{
reg_named_capture_assign_t *arg = (reg_named_capture_assign_t*)arg0;
struct parser_params* p = arg->parser;
rb_encoding *enc = arg->enc;
const rb_code_location_t *loc = arg->loc;
long len = name_end - name;
const char *s = (const char *)name;
return rb_reg_named_capture_assign_iter_impl(p, s, len, enc, &arg->succ_block, loc);
}
static NODE *
reg_named_capture_assign(struct parser_params* p, VALUE regexp, const rb_code_location_t *loc)
{
reg_named_capture_assign_t arg;
arg.parser = p;
arg.enc = rb_enc_get(regexp);
arg.succ_block = 0;
arg.loc = loc;
onig_foreach_name(RREGEXP_PTR(regexp), reg_named_capture_assign_iter, &arg);
if (!arg.succ_block) return 0;
return RNODE_BLOCK(arg.succ_block)->nd_next;
}
static int
rtest(VALUE obj)
{
return (int)RB_TEST(obj);
}
static int
nil_p(VALUE obj)
{
return (int)NIL_P(obj);
}
static VALUE
syntax_error_new(void)
{
return rb_class_new_instance(0, 0, rb_eSyntaxError);
}
static void *
memmove2(void *dest, const void *src, size_t t, size_t n)
{
return memmove(dest, src, rbimpl_size_mul_or_raise(t, n));
}
static void *
nonempty_memcpy(void *dest, const void *src, size_t t, size_t n)
{
return ruby_nonempty_memcpy(dest, src, rbimpl_size_mul_or_raise(t, n));
}
static VALUE
ruby_verbose2(void)
{
return ruby_verbose;
}
static int *
rb_errno_ptr2(void)
{
return rb_errno_ptr();
}
static void *
zalloc(size_t elemsiz)
{
return ruby_xcalloc(1, elemsiz);
}
static void
gc_guard(VALUE obj)
{
RB_GC_GUARD(obj);
}
static VALUE
arg_error(void)
{
return rb_eArgError;
}
static VALUE
static_id2sym(ID id)
{
return (((VALUE)(id)<<RUBY_SPECIAL_SHIFT)|SYMBOL_FLAG);
}
static long
str_coderange_scan_restartable(const char *s, const char *e, parser_encoding *enc, int *cr)
{
return rb_str_coderange_scan_restartable(s, e, enc, cr);
}
static int
enc_mbminlen(parser_encoding *enc)
{
return rb_enc_mbminlen(enc);
}
static bool
enc_isascii(OnigCodePoint c, parser_encoding *enc)
{
return rb_enc_isascii(c, enc);
}
static OnigCodePoint
enc_mbc_to_codepoint(const char *p, const char *e, parser_encoding *enc)
{
const OnigUChar *up = RBIMPL_CAST((const OnigUChar *)p);
const OnigUChar *ue = RBIMPL_CAST((const OnigUChar *)e);
return ONIGENC_MBC_TO_CODE((rb_encoding *)enc, up, ue);
}
extern VALUE rb_eArgError;
static const rb_parser_config_t rb_global_parser_config = {
.malloc = ruby_xmalloc,
.calloc = ruby_xcalloc,
.realloc = ruby_xrealloc,
.free = ruby_xfree,
.alloc_n = ruby_xmalloc2,
.alloc = ruby_xmalloc,
.realloc_n = ruby_xrealloc2,
.zalloc = zalloc,
.rb_memmove = memmove2,
.nonempty_memcpy = nonempty_memcpy,
.xmalloc_mul_add = rb_xmalloc_mul_add,
.compile_callback = rb_suppress_tracing,
.reg_named_capture_assign = reg_named_capture_assign,
.attr_get = rb_attr_get,
.ary_new = rb_ary_new,
.ary_push = rb_ary_push,
.ary_new_from_args = rb_ary_new_from_args,
.ary_unshift = rb_ary_unshift,
.make_temporary_id = rb_make_temporary_id,
.is_local_id = is_local_id2,
.is_attrset_id = is_attrset_id2,
.is_global_name_punct = is_global_name_punct,
.id_type = id_type,
.id_attrset = rb_id_attrset,
.intern = rb_intern,
.intern2 = rb_intern2,
.intern3 = intern3,
.intern_str = rb_intern_str,
.is_notop_id = is_notop_id2,
.enc_symname_type = enc_symname_type,
.id2name = rb_id2name,
.id2str = rb_id2str,
.id2sym = rb_id2sym,
.sym2id = rb_sym2id,
.str_catf = rb_str_catf,
.str_cat_cstr = rb_str_cat_cstr,
.str_modify = rb_str_modify,
.str_set_len = rb_str_set_len,
.str_cat = rb_str_cat,
.str_resize = rb_str_resize,
.str_new = rb_str_new,
.str_new_cstr = rb_str_new_cstr,
.str_to_interned_str = rb_str_to_interned_str,
.is_ascii_string = is_ascii_string2,
.enc_str_new = enc_str_new,
.str_vcatf = rb_str_vcatf,
.string_value_cstr = rb_string_value_cstr,
.rb_sprintf = rb_sprintf,
.rstring_ptr = RSTRING_PTR,
.rstring_end = RSTRING_END,
.rstring_len = RSTRING_LEN,
.obj_as_string = rb_obj_as_string,
.int2num = rb_int2num_inline,
.stderr_tty_p = rb_stderr_tty_p,
.write_error_str = rb_write_error_str,
.io_write = rb_io_write,
.io_flush = rb_io_flush,
.io_puts = rb_io_puts,
.debug_output_stdout = rb_ractor_stdout,
.debug_output_stderr = rb_ractor_stderr,
.is_usascii_enc = is_usascii_enc,
.enc_isalnum = enc_isalnum,
.enc_precise_mbclen = enc_precise_mbclen,
.mbclen_charfound_p = mbclen_charfound_p,
.mbclen_charfound_len = mbclen_charfound_len,
.enc_name = enc_name,
.enc_prev_char = enc_prev_char,
.enc_get = enc_get,
.enc_asciicompat = enc_asciicompat,
.utf8_encoding = utf8_encoding,
.enc_associate = enc_associate,
.ascii8bit_encoding = ascii8bit_encoding,
.enc_codelen = enc_codelen,
.enc_mbcput = enc_mbcput,
.enc_mbclen = enc_mbclen,
.enc_find_index = rb_enc_find_index,
.enc_from_index = enc_from_index,
.enc_isspace = enc_isspace,
.enc_coderange_7bit = ENC_CODERANGE_7BIT,
.enc_coderange_unknown = ENC_CODERANGE_UNKNOWN,
.usascii_encoding = usascii_encoding,
.enc_coderange_broken = ENC_CODERANGE_BROKEN,
.enc_mbminlen = enc_mbminlen,
.enc_isascii = enc_isascii,
.enc_mbc_to_codepoint = enc_mbc_to_codepoint,
.local_defined = local_defined,
.dvar_defined = dvar_defined,
.syntax_error_append = syntax_error_append,
.raise = rb_raise,
.syntax_error_new = syntax_error_new,
.errinfo = rb_errinfo,
.set_errinfo = rb_set_errinfo,
.exc_raise = rb_exc_raise,
.make_exception = rb_make_exception,
.sized_xfree = ruby_sized_xfree,
.sized_realloc_n = ruby_sized_realloc_n,
.gc_guard = gc_guard,
.gc_mark = rb_gc_mark,
.reg_compile = rb_reg_compile,
.reg_check_preprocess = rb_reg_check_preprocess,
.memcicmp = rb_memcicmp,
.compile_warn = rb_compile_warn,
.compile_warning = rb_compile_warning,
.bug = rb_bug,
.fatal = rb_fatal,
.verbose = ruby_verbose2,
.errno_ptr = rb_errno_ptr2,
.make_backtrace = rb_make_backtrace,
.scan_hex = ruby_scan_hex,
.scan_oct = ruby_scan_oct,
.scan_digits = ruby_scan_digits,
.strtod = ruby_strtod,
.rtest = rtest,
.nil_p = nil_p,
.qnil = Qnil,
.qfalse = Qfalse,
.eArgError = arg_error,
.long2int = rb_long2int,
/* For Ripper */
.static_id2sym = static_id2sym,
.str_coderange_scan_restartable = str_coderange_scan_restartable,
};
#endif
enum lex_type {
lex_type_str,
lex_type_io,
lex_type_array,
lex_type_generic,
};
struct ruby_parser {
rb_parser_t *parser_params;
enum lex_type type;
union {
struct lex_pointer_string lex_str;
struct {
VALUE file;
} lex_io;
struct {
VALUE ary;
} lex_array;
} data;
};
static void
parser_mark(void *ptr)
{
struct ruby_parser *parser = (struct ruby_parser*)ptr;
rb_ruby_parser_mark(parser->parser_params);
switch (parser->type) {
case lex_type_str:
rb_gc_mark(parser->data.lex_str.str);
break;
case lex_type_io:
rb_gc_mark(parser->data.lex_io.file);
break;
case lex_type_array:
rb_gc_mark(parser->data.lex_array.ary);
break;
case lex_type_generic:
/* noop. Caller of rb_parser_compile_generic should mark the objects. */
break;
}
}
static void
parser_free(void *ptr)
{
struct ruby_parser *parser = (struct ruby_parser*)ptr;
rb_ruby_parser_free(parser->parser_params);
xfree(parser);
}
static size_t
parser_memsize(const void *ptr)
{
struct ruby_parser *parser = (struct ruby_parser*)ptr;
return rb_ruby_parser_memsize(parser->parser_params);
}
static const rb_data_type_t ruby_parser_data_type = {
"parser",
{
parser_mark,
parser_free,
parser_memsize,
},
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
};
#ifdef UNIVERSAL_PARSER
const rb_parser_config_t *
rb_ruby_parser_config(void)
{
return &rb_global_parser_config;
}
rb_parser_t *
rb_parser_params_new(void)
{
return rb_ruby_parser_new(&rb_global_parser_config);
}
#else
rb_parser_t *
rb_parser_params_new(void)
{
return rb_ruby_parser_new();
}
#endif /* UNIVERSAL_PARSER */
VALUE
rb_parser_new(void)
{
struct ruby_parser *parser;
rb_parser_t *parser_params;
/*
* Create parser_params ahead of vparser because
* rb_ruby_parser_new can run GC so if create vparser
* first, parser_mark tries to mark not initialized parser_params.
*/
parser_params = rb_parser_params_new();
VALUE vparser = TypedData_Make_Struct(0, struct ruby_parser,
&ruby_parser_data_type, parser);
parser->parser_params = parser_params;
return vparser;
}
void
rb_parser_set_options(VALUE vparser, int print, int loop, int chomp, int split)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_options(parser->parser_params, print, loop, chomp, split);
}
VALUE
rb_parser_set_context(VALUE vparser, const struct rb_iseq_struct *base, int main)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_context(parser->parser_params, base, main);
return vparser;
}
void
rb_parser_set_script_lines(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_script_lines(parser->parser_params);
}
void
rb_parser_error_tolerant(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_error_tolerant(parser->parser_params);
}
void
rb_parser_keep_tokens(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_keep_tokens(parser->parser_params);
}
rb_parser_string_t *
rb_parser_lex_get_str(struct parser_params *p, struct lex_pointer_string *ptr_str)
{
char *beg, *end, *start;
long len;
VALUE s = ptr_str->str;
beg = RSTRING_PTR(s);
len = RSTRING_LEN(s);
start = beg;
if (ptr_str->ptr) {
if (len == ptr_str->ptr) return 0;
beg += ptr_str->ptr;
len -= ptr_str->ptr;
}
end = memchr(beg, '\n', len);
if (end) len = ++end - beg;
ptr_str->ptr += len;
return rb_str_to_parser_string(p, rb_str_subseq(s, beg - start, len));
}
static rb_parser_string_t *
lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
{
return rb_parser_lex_get_str(p, (struct lex_pointer_string *)input);
}
static void parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines);
static rb_ast_t*
parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
{
rb_ast_t *ast;
const char *ptr = 0;
long len = 0;
rb_encoding *enc = 0;
if (!NIL_P(fname)) {
StringValueCStr(fname);
ptr = RSTRING_PTR(fname);
len = RSTRING_LEN(fname);
enc = rb_enc_get(fname);
}
ast = rb_parser_compile(p, gets, ptr, len, enc, input, line);
parser_aset_script_lines_for(fname, ast->body.script_lines);
return ast;
}
static rb_ast_t*
parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int line)
{
VALUE str = rb_str_new_frozen(s);
parser->type = lex_type_str;
parser->data.lex_str.str = str;
parser->data.lex_str.ptr = 0;
return parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line);
}
static rb_encoding *
must_be_ascii_compatible(VALUE s)
{
rb_encoding *enc = rb_enc_get(s);
if (!rb_enc_asciicompat(enc)) {
rb_raise(rb_eArgError, "invalid source encoding");
}
return enc;
}
static rb_ast_t*
parser_compile_string_path(struct ruby_parser *parser, VALUE f, VALUE s, int line)
{
must_be_ascii_compatible(s);
return parser_compile_string0(parser, f, s, line);
}
static rb_ast_t*
parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int line)
{
return parser_compile_string_path(parser, rb_filesystem_str_new_cstr(f), s, line);
}
VALUE rb_io_gets_internal(VALUE io);
static rb_parser_string_t *
lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count)
{
VALUE io = (VALUE)input;
VALUE line = rb_io_gets_internal(io);
if (NIL_P(line)) return 0;
return rb_str_to_parser_string(p, line);
}
static rb_parser_string_t *
lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index)
{
VALUE array = (VALUE)data;
VALUE str = rb_ary_entry(array, index);
if (!NIL_P(str)) {
StringValue(str);
if (!rb_enc_asciicompat(rb_enc_get(str))) {
rb_raise(rb_eArgError, "invalid source encoding");
}
return rb_str_to_parser_string(p, str);
}
else {
return 0;
}
}
static rb_ast_t*
parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, int start)
{
parser->type = lex_type_io;
parser->data.lex_io.file = file;
return parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start);
}
static rb_ast_t*
parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int start)
{
parser->type = lex_type_array;
parser->data.lex_array.ary = array;
return parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start);
}
static rb_ast_t*
parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
{
parser->type = lex_type_generic;
return parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start);
}
static void
ast_free(void *ptr)
{
rb_ast_t *ast = (rb_ast_t *)ptr;
rb_ast_free(ast);
}
static const rb_data_type_t ast_data_type = {
"AST",
{
NULL,
ast_free,
NULL, // No dsize() because this object does not appear in ObjectSpace.
},
0, 0, RUBY_TYPED_FREE_IMMEDIATELY
};
static VALUE
ast_alloc(void)
{
return TypedData_Wrap_Struct(0, &ast_data_type, NULL);
}
VALUE
rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
{
struct ruby_parser *parser;
VALUE ast_value = ast_alloc();
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
DATA_PTR(ast_value) = parser_compile_file_path(parser, fname, file, start);
RB_GC_GUARD(vparser);
return ast_value;
}
VALUE
rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start)
{
struct ruby_parser *parser;
VALUE ast_value = ast_alloc();
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
DATA_PTR(ast_value) = parser_compile_array(parser, fname, array, start);
RB_GC_GUARD(vparser);
return ast_value;
}
VALUE
rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
{
struct ruby_parser *parser;
VALUE ast_value = ast_alloc();
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
DATA_PTR(ast_value) = parser_compile_generic(parser, lex_gets, fname, input, start);
RB_GC_GUARD(vparser);
return ast_value;
}
VALUE
rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
{
struct ruby_parser *parser;
VALUE ast_value = ast_alloc();
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
DATA_PTR(ast_value) = parser_compile_string(parser, f, s, line);
RB_GC_GUARD(vparser);
return ast_value;
}
VALUE
rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line)
{
struct ruby_parser *parser;
VALUE ast_value = ast_alloc();
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
DATA_PTR(ast_value) = parser_compile_string_path(parser, f, s, line);
RB_GC_GUARD(vparser);
return ast_value;
}
VALUE
rb_parser_encoding(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
return rb_enc_from_encoding(rb_ruby_parser_encoding(parser->parser_params));
}
VALUE
rb_parser_end_seen_p(VALUE vparser)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
return RBOOL(rb_ruby_parser_end_seen_p(parser->parser_params));
}
VALUE
rb_parser_set_yydebug(VALUE vparser, VALUE flag)
{
struct ruby_parser *parser;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_yydebug(parser->parser_params, RTEST(flag));
return flag;
}
void
rb_set_script_lines_for(VALUE vparser, VALUE path)
{
struct ruby_parser *parser;
VALUE hash;
ID script_lines;
CONST_ID(script_lines, "SCRIPT_LINES__");
if (!rb_const_defined_at(rb_cObject, script_lines)) return;
hash = rb_const_get_at(rb_cObject, script_lines);
if (RB_TYPE_P(hash, T_HASH)) {
rb_hash_aset(hash, path, Qtrue);
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
rb_ruby_parser_set_script_lines(parser->parser_params);
}
}
VALUE
rb_parser_build_script_lines_from(rb_parser_ary_t *lines)
{
int i;
if (!lines) return Qnil;
if (lines->data_type != PARSER_ARY_DATA_SCRIPT_LINE) {
rb_bug("unexpected rb_parser_ary_data_type (%d) for script lines", lines->data_type);
}
VALUE script_lines = rb_ary_new_capa(lines->len);
for (i = 0; i < lines->len; i++) {
rb_parser_string_t *str = (rb_parser_string_t *)lines->data[i];
rb_ary_push(script_lines, rb_enc_str_new(str->ptr, str->len, str->enc));
}
return script_lines;
}
VALUE
rb_str_new_parser_string(rb_parser_string_t *str)
{
VALUE string = rb_enc_literal_str(str->ptr, str->len, str->enc);
rb_enc_str_coderange(string);
return string;
}
VALUE
rb_str_new_mutable_parser_string(rb_parser_string_t *str)
{
return rb_enc_str_new(str->ptr, str->len, str->enc);
}
static VALUE
negative_numeric(VALUE val)
{
if (FIXNUM_P(val)) {
return LONG2FIX(-FIX2LONG(val));
}
if (SPECIAL_CONST_P(val)) {
#if USE_FLONUM
if (FLONUM_P(val)) {
return DBL2NUM(-RFLOAT_VALUE(val));
}
#endif
goto unknown;
}
switch (BUILTIN_TYPE(val)) {
case T_BIGNUM:
BIGNUM_NEGATE(val);
val = rb_big_norm(val);
break;
case T_RATIONAL:
RATIONAL_SET_NUM(val, negative_numeric(RRATIONAL(val)->num));
break;
case T_COMPLEX:
RCOMPLEX_SET_REAL(val, negative_numeric(RCOMPLEX(val)->real));
RCOMPLEX_SET_IMAG(val, negative_numeric(RCOMPLEX(val)->imag));
break;
case T_FLOAT:
val = DBL2NUM(-RFLOAT_VALUE(val));
break;
unknown:
default:
rb_bug("unknown literal type (%s) passed to negative_numeric",
rb_builtin_class_name(val));
break;
}
return val;
}
static VALUE
integer_value(const char *val, int base)
{
return rb_cstr_to_inum(val, base, FALSE);
}
static VALUE
rational_value(const char *node_val, int base, int seen_point)
{
VALUE lit;
char* val = strdup(node_val);
if (seen_point > 0) {
int len = (int)(strlen(val));
char *point = &val[seen_point];
size_t fraclen = len-seen_point-1;
memmove(point, point+1, fraclen+1);
lit = rb_rational_new(integer_value(val, base), rb_int_positive_pow(10, fraclen));
}
else {
lit = rb_rational_raw1(integer_value(val, base));
}
free(val);
return lit;
}
VALUE
rb_node_integer_literal_val(const NODE *n)
{
const rb_node_integer_t *node = RNODE_INTEGER(n);
VALUE val = integer_value(node->val, node->base);
if (node->minus) {
val = negative_numeric(val);
}
return val;
}
VALUE
rb_node_float_literal_val(const NODE *n)
{
const rb_node_float_t *node = RNODE_FLOAT(n);
double d = strtod(node->val, 0);
if (node->minus) {
d = -d;
}
VALUE val = DBL2NUM(d);
return val;
}
VALUE
rb_node_rational_literal_val(const NODE *n)
{
VALUE lit;
const rb_node_rational_t *node = RNODE_RATIONAL(n);
lit = rational_value(node->val, node->base, node->seen_point);
if (node->minus) {
lit = negative_numeric(lit);
}
return lit;
}
VALUE
rb_node_imaginary_literal_val(const NODE *n)
{
VALUE lit;
const rb_node_imaginary_t *node = RNODE_IMAGINARY(n);
enum rb_numeric_type type = node->type;
switch (type) {
case integer_literal:
lit = integer_value(node->val, node->base);
break;
case float_literal:{
double d = strtod(node->val, 0);
lit = DBL2NUM(d);
break;
}
case rational_literal:
lit = rational_value(node->val, node->base, node->seen_point);
break;
default:
rb_bug("unreachable");
}
lit = rb_complex_raw(INT2FIX(0), lit);
if (node->minus) {
lit = negative_numeric(lit);
}
return lit;
}
VALUE
rb_node_str_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_STR(node)->string;
return rb_str_new_parser_string(str);
}
VALUE
rb_node_sym_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_SYM(node)->string;
return ID2SYM(rb_intern3(str->ptr, str->len, str->enc));
}
VALUE
rb_node_dstr_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DSTR(node)->string;
return str ? rb_str_new_parser_string(str) : Qnil;
}
VALUE
rb_node_dregx_string_val(const NODE *node)
{
rb_parser_string_t *str = RNODE_DREGX(node)->string;
return rb_str_new_parser_string(str);
}
VALUE
rb_node_regx_string_val(const NODE *node)
{
rb_node_regx_t *node_reg = RNODE_REGX(node);
rb_parser_string_t *string = node_reg->string;
VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
return rb_reg_compile(str, node_reg->options, NULL, 0);
}
VALUE
rb_node_line_lineno_val(const NODE *node)
{
return INT2FIX(node->nd_loc.beg_pos.lineno);
}
VALUE
rb_node_file_path_val(const NODE *node)
{
return rb_str_new_parser_string(RNODE_FILE(node)->path);
}
VALUE
rb_node_encoding_val(const NODE *node)
{
return rb_enc_from_encoding(RNODE_ENCODING(node)->enc);
}
static void
parser_aset_script_lines_for(VALUE path, rb_parser_ary_t *lines)
{
VALUE hash, script_lines;
ID script_lines_id;
if (NIL_P(path) || !lines) return;
CONST_ID(script_lines_id, "SCRIPT_LINES__");
if (!rb_const_defined_at(rb_cObject, script_lines_id)) return;
hash = rb_const_get_at(rb_cObject, script_lines_id);
if (!RB_TYPE_P(hash, T_HASH)) return;
if (rb_hash_lookup(hash, path) == Qnil) return;
script_lines = rb_parser_build_script_lines_from(lines);
rb_hash_aset(hash, path, script_lines);
}
VALUE
rb_ruby_ast_new(const NODE *const root)
{
rb_ast_t *ast;
VALUE ast_value = TypedData_Make_Struct(0, rb_ast_t, &ast_data_type, ast);
#ifdef UNIVERSAL_PARSER
ast->config = &rb_global_parser_config;
#endif
ast->body = (rb_ast_body_t){
.root = root,
.frozen_string_literal = -1,
.coverage_enabled = -1,
.script_lines = NULL,
.line_count = 0,
};
return ast_value;
}
rb_ast_t *
rb_ruby_ast_data_get(VALUE ast_value)
{
rb_ast_t *ast;
if (NIL_P(ast_value)) return NULL;
TypedData_Get_Struct(ast_value, rb_ast_t, &ast_data_type, ast);
return ast;
}