From 3b815ed7da8261f45b84dcde2c900934f7379dac Mon Sep 17 00:00:00 2001 From: Jemma Issroff Date: Mon, 28 Aug 2023 16:55:58 -0400 Subject: [PATCH] Add yarp/yarp_compiler.c (#8042) * Add yarp/yarp_compiler.c as stencil for compiling YARP This commit adds yarp/yarp_compiler.c, and changes the sync script to ensure that yarp/yarp_compiler.c will not get overwritten * [Misc #119772] Create and expose RubyVM::InstructionSequence.compile_yarp This commit creates the stencil for a compile_yarp function, which we will continue to fill out. It allows us to check the output of compiled YARP code against compiled code without using YARP. --- common.mk | 45 ++++++++++++++++++++++++++ compile.c | 19 +++++++++++ iseq.c | 67 ++++++++++++++++++++++++++++++++++++++- tool/sync_default_gems.rb | 2 ++ yarp/yarp_compiler.c | 18 +++++++++++ 5 files changed, 150 insertions(+), 1 deletion(-) create mode 100644 yarp/yarp_compiler.c diff --git a/common.mk b/common.mk index 1a85d0605a..d55d1788aa 100644 --- a/common.mk +++ b/common.mk @@ -3182,6 +3182,26 @@ compile.$(OBJEXT): $(top_srcdir)/internal/thread.h compile.$(OBJEXT): $(top_srcdir)/internal/variable.h compile.$(OBJEXT): $(top_srcdir)/internal/vm.h compile.$(OBJEXT): $(top_srcdir)/internal/warnings.h +compile.$(OBJEXT): $(top_srcdir)/yarp/defines.h +compile.$(OBJEXT): $(top_srcdir)/yarp/diagnostic.h +compile.$(OBJEXT): $(top_srcdir)/yarp/enc/yp_encoding.h +compile.$(OBJEXT): $(top_srcdir)/yarp/node.h +compile.$(OBJEXT): $(top_srcdir)/yarp/pack.h +compile.$(OBJEXT): $(top_srcdir)/yarp/parser.h +compile.$(OBJEXT): $(top_srcdir)/yarp/regexp.h +compile.$(OBJEXT): $(top_srcdir)/yarp/unescape.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_buffer.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_char.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_constant_pool.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_list.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_memchr.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_newline_list.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_state_stack.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_string.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_string_list.h +compile.$(OBJEXT): $(top_srcdir)/yarp/util/yp_strpbrk.h +compile.$(OBJEXT): $(top_srcdir)/yarp/yarp.h +compile.$(OBJEXT): $(top_srcdir)/yarp/yarp_compiler.c compile.$(OBJEXT): {$(VPATH)}assert.h compile.$(OBJEXT): {$(VPATH)}atomic.h compile.$(OBJEXT): {$(VPATH)}backward/2/assume.h @@ -3379,6 +3399,9 @@ compile.$(OBJEXT): {$(VPATH)}vm_callinfo.h compile.$(OBJEXT): {$(VPATH)}vm_core.h compile.$(OBJEXT): {$(VPATH)}vm_debug.h compile.$(OBJEXT): {$(VPATH)}vm_opts.h +compile.$(OBJEXT): {$(VPATH)}yarp/ast.h +compile.$(OBJEXT): {$(VPATH)}yarp/version.h +compile.$(OBJEXT): {$(VPATH)}yarp/yarp.h complex.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h complex.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h complex.$(OBJEXT): $(CCAN_DIR)/list/list.h @@ -8206,6 +8229,25 @@ iseq.$(OBJEXT): $(top_srcdir)/internal/thread.h iseq.$(OBJEXT): $(top_srcdir)/internal/variable.h iseq.$(OBJEXT): $(top_srcdir)/internal/vm.h iseq.$(OBJEXT): $(top_srcdir)/internal/warnings.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/defines.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/diagnostic.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/enc/yp_encoding.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/node.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/pack.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/parser.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/regexp.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/unescape.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_buffer.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_char.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_constant_pool.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_list.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_memchr.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_newline_list.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_state_stack.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_string.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_string_list.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/util/yp_strpbrk.h +iseq.$(OBJEXT): $(top_srcdir)/yarp/yarp.h iseq.$(OBJEXT): {$(VPATH)}assert.h iseq.$(OBJEXT): {$(VPATH)}atomic.h iseq.$(OBJEXT): {$(VPATH)}backward/2/assume.h @@ -8400,6 +8442,9 @@ iseq.$(OBJEXT): {$(VPATH)}util.h iseq.$(OBJEXT): {$(VPATH)}vm_callinfo.h iseq.$(OBJEXT): {$(VPATH)}vm_core.h iseq.$(OBJEXT): {$(VPATH)}vm_opts.h +iseq.$(OBJEXT): {$(VPATH)}yarp/ast.h +iseq.$(OBJEXT): {$(VPATH)}yarp/version.h +iseq.$(OBJEXT): {$(VPATH)}yarp/yarp.h iseq.$(OBJEXT): {$(VPATH)}yjit.h load.$(OBJEXT): $(CCAN_DIR)/check_type/check_type.h load.$(OBJEXT): $(CCAN_DIR)/container_of/container_of.h diff --git a/compile.c b/compile.c index 1b382765c7..d7f345d0af 100644 --- a/compile.c +++ b/compile.c @@ -43,6 +43,9 @@ #include "builtin.h" #include "insns.inc" #include "insns_info.inc" +#include "yarp/yarp.h" + +VALUE rb_iseq_compile_yarp_node(rb_iseq_t * iseq, const yp_node_t * yarp_pointer); #undef RUBY_UNTYPED_DATA_WARNING #define RUBY_UNTYPED_DATA_WARNING 0 @@ -855,6 +858,20 @@ rb_iseq_compile_node(rb_iseq_t *iseq, const NODE *node) return iseq_setup(iseq, ret); } +static VALUE rb_translate_yarp(rb_iseq_t *iseq, const yp_node_t *node, LINK_ANCHOR *const ret); + +VALUE +rb_iseq_compile_yarp_node(rb_iseq_t * iseq, const yp_node_t * yarp_pointer) +{ + DECL_ANCHOR(ret); + INIT_ANCHOR(ret); + + CHECK(rb_translate_yarp(iseq, yarp_pointer, ret)); + + CHECK(iseq_setup_insn(iseq, ret)); + return iseq_setup(iseq, ret); +} + static int rb_iseq_translate_threaded_code(rb_iseq_t *iseq) { @@ -13288,3 +13305,5 @@ rb_iseq_ibf_load_extra_data(VALUE str) RB_GC_GUARD(loader_obj); return extra_str; } + +#include "yarp/yarp_compiler.c" diff --git a/iseq.c b/iseq.c index 80bd280a14..0b7ea138fa 100644 --- a/iseq.c +++ b/iseq.c @@ -43,6 +43,9 @@ #include "builtin.h" #include "insns.inc" #include "insns_info.inc" +#include "yarp/yarp.h" + +VALUE rb_iseq_compile_yarp_node(rb_iseq_t * iseq, const yp_node_t * yarp_pointer); VALUE rb_cISeq; static VALUE iseqw_new(const rb_iseq_t *iseq); @@ -1326,7 +1329,7 @@ rb_iseqw_new(const rb_iseq_t *iseq) static VALUE iseqw_s_compile(int argc, VALUE *argv, VALUE self) { - VALUE src, file = Qnil, path = Qnil, line = INT2FIX(1), opt = Qnil; + VALUE src, file = Qnil, path = Qnil, line = Qnil, opt = Qnil; int i; i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt); @@ -1348,6 +1351,67 @@ iseqw_s_compile(int argc, VALUE *argv, VALUE self) return iseqw_new(rb_iseq_compile_with_option(src, file, path, line, opt)); } +static VALUE +iseqw_s_compile_yarp(int argc, VALUE *argv, VALUE self) +{ + VALUE src, file = Qnil, path = Qnil, line = Qnil, opt = Qnil; + int i; + + i = rb_scan_args(argc, argv, "1*:", &src, NULL, &opt); + if (i > 4+NIL_P(opt)) rb_error_arity(argc, 1, 5); + switch (i) { + case 5: opt = argv[--i]; + case 4: line = argv[--i]; + case 3: path = argv[--i]; + case 2: file = argv[--i]; + } + + if (NIL_P(file)) file = rb_fstring_lit(""); + if (NIL_P(path)) path = file; + if (NIL_P(line)) line = INT2FIX(1); + + Check_Type(path, T_STRING); + Check_Type(file, T_STRING); + + rb_iseq_t *iseq = iseq_alloc(); + + yp_parser_t parser; + size_t len = RSTRING_LEN(src); + VALUE name = rb_fstring_lit(""); + + yp_parser_init(&parser, RSTRING_PTR(src), len, ""); + + yp_node_t *node = yp_parse(&parser); + + int first_lineno = NUM2INT(line); + yp_line_column_t start_loc = yp_newline_list_line_column(&parser.newline_list, node->location.start); + yp_line_column_t end_loc = yp_newline_list_line_column(&parser.newline_list, node->location.end); + + rb_code_location_t node_location; + node_location.beg_pos.lineno = (int)start_loc.line; + node_location.beg_pos.column = (int)start_loc.column; + node_location.end_pos.lineno = (int)end_loc.line; + node_location.end_pos.column = (int)end_loc.column; + + int node_id = 0; + + rb_iseq_t *parent = NULL; + enum rb_iseq_type iseq_type = ISEQ_TYPE_TOP; + rb_compile_option_t option; + + make_compile_option(&option, opt); + + prepare_iseq_build(iseq, name, file, path, first_lineno, &node_location, node_id, + parent, 0, (enum rb_iseq_type)iseq_type, Qnil, &option); + + rb_iseq_compile_yarp_node(iseq, node); + + yp_node_destroy(&parser, node); + yp_parser_free(&parser); + + return iseqw_new(iseq); +} + /* * call-seq: * InstructionSequence.compile_file(file[, options]) -> iseq @@ -3920,6 +3984,7 @@ Init_ISeq(void) (void)iseq_s_load; rb_define_singleton_method(rb_cISeq, "compile", iseqw_s_compile, -1); + rb_define_singleton_method(rb_cISeq, "compile_yarp", iseqw_s_compile_yarp, -1); rb_define_singleton_method(rb_cISeq, "new", iseqw_s_compile, -1); rb_define_singleton_method(rb_cISeq, "compile_file", iseqw_s_compile_file, -1); rb_define_singleton_method(rb_cISeq, "compile_option", iseqw_s_compile_option_get, 0); diff --git a/tool/sync_default_gems.rb b/tool/sync_default_gems.rb index ad52c3faff..9f1bde06a8 100755 --- a/tool/sync_default_gems.rb +++ b/tool/sync_default_gems.rb @@ -400,6 +400,7 @@ module SyncDefaultGems # We don't want to remove yarp_init.c, so we temporarily move it # out of the yarp dir, wipe the yarp dir, and then put it back mv("yarp/yarp_init.c", ".") if File.exist? "yarp/yarp_init.c" + mv("yarp/yarp_compiler.c", ".") if File.exist? "yarp/yarp_compiler.c" rm_rf(%w[test/yarp yarp]) # Run the YARP templating scripts @@ -417,6 +418,7 @@ module SyncDefaultGems rm("yarp/extconf.rb") mv("yarp_init.c", "yarp/") + mv("yarp_compiler.c", "yarp/") else sync_lib gem, upstream end diff --git a/yarp/yarp_compiler.c b/yarp/yarp_compiler.c new file mode 100644 index 0000000000..ae8575e33a --- /dev/null +++ b/yarp/yarp_compiler.c @@ -0,0 +1,18 @@ +#include "yarp.h" + +static void +yp_compile_node(rb_iseq_t *iseq, const yp_node_t *node, LINK_ANCHOR *const ret, const char * src, bool popped) { + return; +} + +static VALUE +rb_translate_yarp(rb_iseq_t *iseq, const yp_node_t *node, LINK_ANCHOR *const ret) +{ + RUBY_ASSERT(ISEQ_COMPILE_DATA(iseq)); + RUBY_ASSERT(node->type == YP_NODE_PROGRAM_NODE); + + yp_compile_node(iseq, node, ret, node->location.start, false); + iseq_set_sequence(iseq, ret); + + return Qnil; +}