[ruby/prism] Provide ability to lock encoding while parsing
https://github.com/ruby/prism/commit/f7faedfb3f
This commit is contained in:
parent
0d5eea9957
commit
d827d32527
@ -431,6 +431,9 @@ module Prism
|
||||
template << "C"
|
||||
values << { nil => 0, "3.3.0" => 1, "3.3.1" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
|
||||
|
||||
template << "C"
|
||||
values << (options[:encoding] == false ? 1 : 0)
|
||||
|
||||
template << "L"
|
||||
if (scopes = options[:scopes])
|
||||
values << scopes.length
|
||||
|
@ -51,7 +51,7 @@ module Prism
|
||||
source = source_buffer.source
|
||||
|
||||
offset_cache = build_offset_cache(source)
|
||||
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
|
||||
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
|
||||
|
||||
build_ast(result.value, offset_cache)
|
||||
ensure
|
||||
@ -64,7 +64,7 @@ module Prism
|
||||
source = source_buffer.source
|
||||
|
||||
offset_cache = build_offset_cache(source)
|
||||
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
|
||||
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
|
||||
|
||||
[
|
||||
build_ast(result.value, offset_cache),
|
||||
@ -83,7 +83,7 @@ module Prism
|
||||
offset_cache = build_offset_cache(source)
|
||||
result =
|
||||
begin
|
||||
unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
|
||||
unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
|
||||
rescue ::Parser::SyntaxError
|
||||
raise if !recover
|
||||
end
|
||||
|
@ -138,7 +138,13 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
|
||||
if (key_id == rb_id_option_filepath) {
|
||||
if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
|
||||
} else if (key_id == rb_id_option_encoding) {
|
||||
if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
|
||||
if (!NIL_P(value)) {
|
||||
if (value == Qfalse) {
|
||||
pm_options_encoding_locked_set(options, true);
|
||||
} else {
|
||||
pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
|
||||
}
|
||||
}
|
||||
} else if (key_id == rb_id_option_line) {
|
||||
if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
|
||||
} else if (key_id == rb_id_option_frozen_string_literal) {
|
||||
@ -206,6 +212,7 @@ build_options(VALUE argument) {
|
||||
static void
|
||||
extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
|
||||
options->line = 1; // default
|
||||
|
||||
if (!NIL_P(keywords)) {
|
||||
struct build_options_data data = { .options = options, .keywords = keywords };
|
||||
struct build_options_data *argument = &data;
|
||||
|
@ -16,6 +16,14 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) {
|
||||
pm_string_constant_init(&options->encoding, encoding, strlen(encoding));
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the encoding_locked option on the given options struct.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION void
|
||||
pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
|
||||
options->encoding_locked = encoding_locked;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set the line option on the given options struct.
|
||||
*/
|
||||
@ -215,6 +223,7 @@ pm_options_read(pm_options_t *options, const char *data) {
|
||||
options->frozen_string_literal = (int8_t) *data++;
|
||||
options->command_line = (uint8_t) *data++;
|
||||
options->version = (pm_options_version_t) *data++;
|
||||
options->encoding_locked = ((uint8_t) *data++) > 0;
|
||||
|
||||
uint32_t scopes_count = pm_options_read_u32(data);
|
||||
data += 4;
|
||||
|
@ -103,6 +103,13 @@ typedef struct {
|
||||
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
||||
*/
|
||||
int8_t frozen_string_literal;
|
||||
|
||||
/**
|
||||
* Whether or not the encoding magic comments should be respected. This is a
|
||||
* niche use-case where you want to parse a file with a specific encoding
|
||||
* but ignore any encoding magic comments at the top of the file.
|
||||
*/
|
||||
bool encoding_locked;
|
||||
} pm_options_t;
|
||||
|
||||
/**
|
||||
@ -166,6 +173,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
|
||||
|
||||
/**
|
||||
* Set the encoding_locked option on the given options struct.
|
||||
*
|
||||
* @param options The options struct to set the encoding_locked value on.
|
||||
* @param encoding_locked The encoding_locked value to set.
|
||||
*/
|
||||
PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
|
||||
|
||||
/**
|
||||
* Set the frozen string literal option on the given options struct.
|
||||
*
|
||||
|
@ -860,6 +860,14 @@ struct pm_parser {
|
||||
/** Whether or not we're currently recovering from a syntax error. */
|
||||
bool recovering;
|
||||
|
||||
/**
|
||||
* This is very specialized behavior for when you want to parse in a context
|
||||
* that does not respect encoding comments. Its main use case is translating
|
||||
* into the whitequark/parser AST which re-encodes source files in UTF-8
|
||||
* before they are parsed and ignores encoding comments.
|
||||
*/
|
||||
bool encoding_locked;
|
||||
|
||||
/**
|
||||
* Whether or not the encoding has been changed by a magic comment. We use
|
||||
* this to provide a fast path for the lexer instead of going through the
|
||||
|
@ -8261,7 +8261,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
||||
|
||||
// We only want to attempt to compare against encoding comments if it's
|
||||
// the first line in the file (or the second in the case of a shebang).
|
||||
if (parser->current.start == parser->encoding_comment_start) {
|
||||
if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
|
||||
if (
|
||||
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
|
||||
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
|
||||
@ -10438,7 +10438,9 @@ parser_lex(pm_parser_t *parser) {
|
||||
// pass and we're at the start of the file, then we need
|
||||
// to do another pass to potentially find other patterns
|
||||
// for encoding comments.
|
||||
if (length >= 10) parser_lex_magic_comment_encoding(parser);
|
||||
if (length >= 10 && !parser->encoding_locked) {
|
||||
parser_lex_magic_comment_encoding(parser);
|
||||
}
|
||||
}
|
||||
|
||||
lexed_comment = true;
|
||||
@ -21244,6 +21246,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
||||
.parsing_eval = false,
|
||||
.command_start = true,
|
||||
.recovering = false,
|
||||
.encoding_locked = false,
|
||||
.encoding_changed = false,
|
||||
.pattern_matching_newlines = false,
|
||||
.in_keyword_arg = false,
|
||||
@ -21291,6 +21294,9 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
||||
parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
|
||||
}
|
||||
|
||||
// encoding_locked option
|
||||
parser->encoding_locked = options->encoding_locked;
|
||||
|
||||
// frozen_string_literal option
|
||||
parser->frozen_string_literal = options->frozen_string_literal;
|
||||
|
||||
|
@ -60,7 +60,6 @@ module Prism
|
||||
# skip them for now.
|
||||
skip_all = skip_incorrect | [
|
||||
"regex.txt",
|
||||
"regex_char_width.txt",
|
||||
"unescaping.txt",
|
||||
"seattlerb/bug190.txt",
|
||||
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
|
||||
|
Loading…
x
Reference in New Issue
Block a user