diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb index 7556576f30..c1f5cfe944 100644 --- a/lib/prism/lex_compat.rb +++ b/lib/prism/lex_compat.rb @@ -607,7 +607,7 @@ module Prism state = :default heredoc_stack = [[]] - result = Prism.lex(source, @filepath) + result = Prism.lex(source, filepath: @filepath) result_value = result.value previous_state = nil diff --git a/prism/extension.c b/prism/extension.c index ed166a5176..679d83c1be 100644 --- a/prism/extension.c +++ b/prism/extension.c @@ -15,6 +15,13 @@ VALUE rb_cPrismParseError; VALUE rb_cPrismParseWarning; VALUE rb_cPrismParseResult; +ID id_filepath; +ID id_encoding; +ID id_line; +ID id_frozen_string_literal; +ID id_suppress_warnings; +ID id_scopes; + /******************************************************************************/ /* IO of Ruby code */ /******************************************************************************/ @@ -53,6 +60,133 @@ input_load_string(pm_string_t *input, VALUE string) { pm_string_constant_init(input, RSTRING_PTR(string), RSTRING_LEN(string)); } +/******************************************************************************/ +/* Building C options from Ruby options */ +/******************************************************************************/ + +/** + * Build the scopes associated with the provided Ruby keyword value. + */ +static void +build_options_scopes(pm_options_t *options, VALUE scopes) { + // Check if the value is an array. If it's not, then raise a type error. + if (!RB_TYPE_P(scopes, T_ARRAY)) { + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scopes)); + } + + // Initialize the scopes array. + size_t scopes_count = RARRAY_LEN(scopes); + pm_options_scopes_init(options, scopes_count); + + // Iterate over the scopes and add them to the options. + for (size_t scope_index = 0; scope_index < scopes_count; scope_index++) { + VALUE scope = rb_ary_entry(scopes, scope_index); + + // Check that the scope is an array. If it's not, then raise a type + // error. + if (!RB_TYPE_P(scope, T_ARRAY)) { + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Array)", rb_obj_class(scope)); + } + + // Initialize the scope array. + size_t locals_count = RARRAY_LEN(scope); + pm_options_scope_t *options_scope = &options->scopes[scope_index]; + pm_options_scope_init(options_scope, locals_count); + + // Iterate over the locals and add them to the scope. + for (size_t local_index = 0; local_index < locals_count; local_index++) { + VALUE local = rb_ary_entry(scope, local_index); + + // Check that the local is a symbol. If it's not, then raise a + // type error. + if (!RB_TYPE_P(local, T_SYMBOL)) { + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected Symbol)", rb_obj_class(local)); + } + + // Add the local to the scope. + pm_string_t *scope_local = &options_scope->locals[local_index]; + const char *name = rb_id2name(SYM2ID(local)); + pm_string_constant_init(scope_local, name, strlen(name)); + } + } +} + +/** + * An iterator function that is called for each key-value in the keywords hash. + */ +static int +build_options_i(VALUE key, VALUE value, VALUE argument) { + pm_options_t *options = (pm_options_t *) argument; + ID key_id = SYM2ID(key); + + if (key_id == id_filepath) { + if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value)); + } else if (key_id == id_encoding) { + if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value))); + } else if (key_id == id_line) { + if (!NIL_P(value)) pm_options_line_set(options, NUM2UINT(value)); + } else if (key_id == id_frozen_string_literal) { + if (!NIL_P(value)) pm_options_frozen_string_literal_set(options, value == Qtrue); + } else if (key_id == id_suppress_warnings) { + if (!NIL_P(value)) pm_options_suppress_warnings_set(options, value == Qtrue); + } else if (key_id == id_scopes) { + if (!NIL_P(value)) build_options_scopes(options, value); + } else { + rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, key); + } + + return ST_CONTINUE; +} + +/** + * We need a struct here to pass through rb_protect and it has to be a single + * value. Because the sizeof(VALUE) == sizeof(void *), we're going to pass this + * through as an opaque pointer and cast it on both sides. + */ +struct build_options_data { + pm_options_t *options; + VALUE keywords; +}; + +/** + * Build the set of options from the given keywords. Note that this can raise a + * Ruby error if the options are not valid. + */ +static VALUE +build_options(VALUE argument) { + struct build_options_data *data = (struct build_options_data *) argument; + rb_hash_foreach(data->keywords, build_options_i, (VALUE) data->options); + return Qnil; +} + +/** + * Extract the options from the given keyword arguments. + */ +static void +extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) { + if (!NIL_P(keywords)) { + struct build_options_data data = { .options = options, .keywords = keywords }; + struct build_options_data *argument = &data; + + int state = 0; + rb_protect(build_options, (VALUE) argument, &state); + + if (state != 0) { + pm_options_free(options); + rb_jump_tag(state); + } + } + + if (!NIL_P(filepath)) { + if (!RB_TYPE_P(filepath, T_STRING)) { + pm_options_free(options); + rb_raise(rb_eTypeError, "wrong argument type %"PRIsVALUE" (expected String)", rb_obj_class(filepath)); + } + + pm_options_filepath_set(options, RSTRING_PTR(filepath)); + } +} + /******************************************************************************/ /* Serializing the AST */ /******************************************************************************/ @@ -83,15 +217,19 @@ dump_input(pm_string_t *input, const pm_options_t *options) { /** * call-seq: - * Prism::dump(source, filepath = nil) -> dumped + * Prism::dump(source, **options) -> String * - * Dump the AST corresponding to the given string to a string. + * Dump the AST corresponding to the given string to a string. For supported + * options, see Prism::parse. */ static VALUE dump(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); @@ -103,9 +241,6 @@ dump(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE value = dump_input(&input, &options); pm_options_free(&options); @@ -118,24 +253,29 @@ dump(int argc, VALUE *argv, VALUE self) { /** * call-seq: - * Prism::dump_file(filepath) -> dumped + * Prism::dump_file(filepath, **options) -> String * - * Dump the AST corresponding to the given file to a string. + * Dump the AST corresponding to the given file to a string. For supported + * options, see Prism::parse. */ static VALUE -dump_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +dump_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); + + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } VALUE value = dump_input(&input, &options); - - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -381,22 +521,23 @@ parse_lex_input(pm_string_t *input, const pm_options_t *options, bool return_nod /** * call-seq: - * Prism::lex(source, filepath = nil) -> Array + * Prism::lex(source, **options) -> Array * - * Return an array of Token instances corresponding to the given string. + * Return an array of Token instances corresponding to the given string. For + * supported options, see Prism::parse. */ static VALUE lex(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE result = parse_lex_input(&input, &options, false); pm_options_free(&options); @@ -405,24 +546,29 @@ lex(int argc, VALUE *argv, VALUE self) { /** * call-seq: - * Prism::lex_file(filepath) -> Array + * Prism::lex_file(filepath, **options) -> Array * - * Return an array of Token instances corresponding to the given file. + * Return an array of Token instances corresponding to the given file. For + * supported options, see Prism::parse. */ static VALUE -lex_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +lex_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); + + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } VALUE value = parse_lex_input(&input, &options, false); - - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -462,15 +608,32 @@ parse_input(pm_string_t *input, const pm_options_t *options) { /** * call-seq: - * Prism::parse(source, filepath = nil) -> ParseResult + * Prism::parse(source, **options) -> ParseResult * - * Parse the given string and return a ParseResult instance. + * Parse the given string and return a ParseResult instance. The options that + * are supported are: + * + * * `filepath` - the filepath of the source being parsed. This should be a + * string or nil + * * `encoding` - the encoding of the source being parsed. This should be an + * encoding or nil + * * `line` - the line number that the parse starts on. This should be an + * integer or nil. Note that this is 1-indexed. + * * `frozen_string_literal` - whether or not the frozen string literal pragma + * has been set. This should be a boolean or nil. + * * `suppress_warnings` - whether or not warnings should be suppressed. This + * should be a boolean or nil. + * * `scopes` - the locals that are in scope surrounding the code that is being + * parsed. This should be an array of arrays of symbols or nil. */ static VALUE parse(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); @@ -482,38 +645,41 @@ parse(int argc, VALUE *argv, VALUE self) { pm_string_constant_init(&input, dup, length); #endif - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE value = parse_input(&input, &options); - pm_options_free(&options); #ifdef PRISM_DEBUG_MODE_BUILD free(dup); #endif + pm_options_free(&options); return value; } /** * call-seq: - * Prism::parse_file(filepath) -> ParseResult + * Prism::parse_file(filepath, **options) -> ParseResult * - * Parse the given file and return a ParseResult instance. + * Parse the given file and return a ParseResult instance. For supported + * options, see Prism::parse. */ static VALUE -parse_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +parse_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); + + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } VALUE value = parse_input(&input, &options); - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -540,22 +706,23 @@ parse_input_comments(pm_string_t *input, const pm_options_t *options) { /** * call-seq: - * Prism::parse_comments(source, filepath = nil) -> Array + * Prism::parse_comments(source, **options) -> Array * - * Parse the given string and return an array of Comment objects. + * Parse the given string and return an array of Comment objects. For supported + * options, see Prism::parse. */ static VALUE parse_comments(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE result = parse_input_comments(&input, &options); pm_options_free(&options); @@ -564,31 +731,36 @@ parse_comments(int argc, VALUE *argv, VALUE self) { /** * call-seq: - * Prism::parse_file_comments(filepath) -> Array + * Prism::parse_file_comments(filepath, **options) -> Array * - * Parse the given file and return an array of Comment objects. + * Parse the given file and return an array of Comment objects. For supported + * options, see Prism::parse. */ static VALUE -parse_file_comments(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +parse_file_comments(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); + + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } VALUE value = parse_input_comments(&input, &options); - - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } /** * call-seq: - * Prism::parse_lex(source, filepath = nil) -> ParseResult + * Prism::parse_lex(source, **options) -> ParseResult * * Parse the given string and return a ParseResult instance that contains a * 2-element array, where the first element is the AST and the second element is @@ -597,30 +769,31 @@ parse_file_comments(VALUE self, VALUE filepath) { * This API is only meant to be used in the case where you need both the AST and * the tokens. If you only need one or the other, use either Prism::parse or * Prism::lex. + * + * For supported options, see Prism::parse. */ static VALUE parse_lex(int argc, VALUE *argv, VALUE self) { VALUE string; - VALUE filepath; - rb_scan_args(argc, argv, "11", &string, &filepath); + VALUE keywords; + rb_scan_args(argc, argv, "1:", &string, &keywords); + + pm_options_t options = { 0 }; + extract_options(&options, Qnil, keywords); pm_string_t input; input_load_string(&input, string); - pm_options_t options = { 0 }; - pm_options_filepath_set(&options, check_string(filepath)); - VALUE value = parse_lex_input(&input, &options, true); - - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } /** * call-seq: - * Prism::parse_lex_file(filepath) -> ParseResult + * Prism::parse_lex_file(filepath, **options) -> ParseResult * * Parse the given file and return a ParseResult instance that contains a * 2-element array, where the first element is the AST and the second element is @@ -629,21 +802,27 @@ parse_lex(int argc, VALUE *argv, VALUE self) { * This API is only meant to be used in the case where you need both the AST and * the tokens. If you only need one or the other, use either Prism::parse_file * or Prism::lex_file. + * + * For supported options, see Prism::parse. */ static VALUE -parse_lex_file(VALUE self, VALUE filepath) { - pm_string_t input; - - const char *checked = check_string(filepath); - if (!pm_string_mapped_init(&input, checked)) return Qnil; +parse_lex_file(int argc, VALUE *argv, VALUE self) { + VALUE filepath; + VALUE keywords; + rb_scan_args(argc, argv, "1:", &filepath, &keywords); pm_options_t options = { 0 }; - pm_options_filepath_set(&options, checked); + extract_options(&options, filepath, keywords); + + pm_string_t input; + if (!pm_string_mapped_init(&input, options.filepath)) { + pm_options_free(&options); + return Qnil; + } VALUE value = parse_lex_input(&input, &options, true); - - pm_options_free(&options); pm_string_free(&input); + pm_options_free(&options); return value; } @@ -821,6 +1000,15 @@ Init_prism(void) { rb_cPrismParseWarning = rb_define_class_under(rb_cPrism, "ParseWarning", rb_cObject); rb_cPrismParseResult = rb_define_class_under(rb_cPrism, "ParseResult", rb_cObject); + // Intern all of the options that we support so that we don't have to do it + // every time we parse. + id_filepath = rb_intern_const("filepath"); + id_encoding = rb_intern_const("encoding"); + id_line = rb_intern_const("line"); + id_frozen_string_literal = rb_intern_const("frozen_string_literal"); + id_suppress_warnings = rb_intern_const("suppress_warnings"); + id_scopes = rb_intern_const("scopes"); + /** * The version of the prism library. */ @@ -835,15 +1023,15 @@ Init_prism(void) { // First, the functions that have to do with lexing and parsing. rb_define_singleton_method(rb_cPrism, "dump", dump, -1); - rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, 1); + rb_define_singleton_method(rb_cPrism, "dump_file", dump_file, -1); rb_define_singleton_method(rb_cPrism, "lex", lex, -1); - rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, 1); + rb_define_singleton_method(rb_cPrism, "lex_file", lex_file, -1); rb_define_singleton_method(rb_cPrism, "parse", parse, -1); - rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, 1); + rb_define_singleton_method(rb_cPrism, "parse_file", parse_file, -1); rb_define_singleton_method(rb_cPrism, "parse_comments", parse_comments, -1); - rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, 1); + rb_define_singleton_method(rb_cPrism, "parse_file_comments", parse_file_comments, -1); rb_define_singleton_method(rb_cPrism, "parse_lex", parse_lex, -1); - rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, 1); + rb_define_singleton_method(rb_cPrism, "parse_lex_file", parse_lex_file, -1); // Next, the functions that will be called by the parser to perform various // internal tasks. We expose these to make them easier to test. diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb index eada2952df..6bd7a5d2a1 100644 --- a/test/prism/parse_test.rb +++ b/test/prism/parse_test.rb @@ -41,7 +41,7 @@ module Prism def test_parse_takes_file_path filepath = "filepath.rb" - result = Prism.parse("def foo; __FILE__; end", filepath) + result = Prism.parse("def foo; __FILE__; end", filepath: filepath) assert_equal filepath, find_source_file_node(result.value).filepath end @@ -122,7 +122,7 @@ module Prism end # Next, assert that there were no errors during parsing. - result = Prism.parse(source, relative) + result = Prism.parse(source, filepath: relative) assert_empty result.errors # Next, pretty print the source. @@ -149,7 +149,7 @@ module Prism # Next, assert that the value can be serialized and deserialized without # changing the shape of the tree. - assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, relative)).value) + assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value) # Next, check that the location ranges of each node in the tree are a # superset of their respective child nodes. @@ -203,10 +203,10 @@ module Prism file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet| snippet = snippet.rstrip - result = Prism.parse(snippet, relative) + result = Prism.parse(snippet, filepath: relative) assert_empty result.errors - assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, relative)).value) + assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value) end end end diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb index a6ce976a85..efe8bc1c1b 100644 --- a/test/prism/ruby_api_test.rb +++ b/test/prism/ruby_api_test.rb @@ -8,12 +8,12 @@ module Prism filepath = __FILE__ source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8) - assert_equal Prism.lex(source, filepath).value, Prism.lex_file(filepath).value - assert_equal Prism.dump(source, filepath), Prism.dump_file(filepath) + assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value + assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath) - serialized = Prism.dump(source, filepath) + serialized = Prism.dump(source, filepath: filepath) ast1 = Prism.load(source, serialized).value - ast2 = Prism.parse(source, filepath).value + ast2 = Prism.parse(source, filepath: filepath).value ast3 = Prism.parse_file(filepath).value assert_equal_nodes ast1, ast2