[ruby/prism] Provide ability to lock encoding while parsing
https://github.com/ruby/prism/commit/f7faedfb3f
This commit is contained in:
parent
0d5eea9957
commit
d827d32527
@ -431,6 +431,9 @@ module Prism
|
|||||||
template << "C"
|
template << "C"
|
||||||
values << { nil => 0, "3.3.0" => 1, "3.3.1" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
|
values << { nil => 0, "3.3.0" => 1, "3.3.1" => 1, "3.4.0" => 0, "latest" => 0 }.fetch(options[:version])
|
||||||
|
|
||||||
|
template << "C"
|
||||||
|
values << (options[:encoding] == false ? 1 : 0)
|
||||||
|
|
||||||
template << "L"
|
template << "L"
|
||||||
if (scopes = options[:scopes])
|
if (scopes = options[:scopes])
|
||||||
values << scopes.length
|
values << scopes.length
|
||||||
|
@ -51,7 +51,7 @@ module Prism
|
|||||||
source = source_buffer.source
|
source = source_buffer.source
|
||||||
|
|
||||||
offset_cache = build_offset_cache(source)
|
offset_cache = build_offset_cache(source)
|
||||||
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
|
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
|
||||||
|
|
||||||
build_ast(result.value, offset_cache)
|
build_ast(result.value, offset_cache)
|
||||||
ensure
|
ensure
|
||||||
@ -64,7 +64,7 @@ module Prism
|
|||||||
source = source_buffer.source
|
source = source_buffer.source
|
||||||
|
|
||||||
offset_cache = build_offset_cache(source)
|
offset_cache = build_offset_cache(source)
|
||||||
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
|
result = unwrap(Prism.parse(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
|
||||||
|
|
||||||
[
|
[
|
||||||
build_ast(result.value, offset_cache),
|
build_ast(result.value, offset_cache),
|
||||||
@ -83,7 +83,7 @@ module Prism
|
|||||||
offset_cache = build_offset_cache(source)
|
offset_cache = build_offset_cache(source)
|
||||||
result =
|
result =
|
||||||
begin
|
begin
|
||||||
unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]]), offset_cache)
|
unwrap(Prism.parse_lex(source, filepath: source_buffer.name, version: convert_for_prism(version), scopes: [[]], encoding: false), offset_cache)
|
||||||
rescue ::Parser::SyntaxError
|
rescue ::Parser::SyntaxError
|
||||||
raise if !recover
|
raise if !recover
|
||||||
end
|
end
|
||||||
|
@ -138,7 +138,13 @@ build_options_i(VALUE key, VALUE value, VALUE argument) {
|
|||||||
if (key_id == rb_id_option_filepath) {
|
if (key_id == rb_id_option_filepath) {
|
||||||
if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
|
if (!NIL_P(value)) pm_options_filepath_set(options, check_string(value));
|
||||||
} else if (key_id == rb_id_option_encoding) {
|
} else if (key_id == rb_id_option_encoding) {
|
||||||
if (!NIL_P(value)) pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
|
if (!NIL_P(value)) {
|
||||||
|
if (value == Qfalse) {
|
||||||
|
pm_options_encoding_locked_set(options, true);
|
||||||
|
} else {
|
||||||
|
pm_options_encoding_set(options, rb_enc_name(rb_to_encoding(value)));
|
||||||
|
}
|
||||||
|
}
|
||||||
} else if (key_id == rb_id_option_line) {
|
} else if (key_id == rb_id_option_line) {
|
||||||
if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
|
if (!NIL_P(value)) pm_options_line_set(options, NUM2INT(value));
|
||||||
} else if (key_id == rb_id_option_frozen_string_literal) {
|
} else if (key_id == rb_id_option_frozen_string_literal) {
|
||||||
@ -206,6 +212,7 @@ build_options(VALUE argument) {
|
|||||||
static void
|
static void
|
||||||
extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
|
extract_options(pm_options_t *options, VALUE filepath, VALUE keywords) {
|
||||||
options->line = 1; // default
|
options->line = 1; // default
|
||||||
|
|
||||||
if (!NIL_P(keywords)) {
|
if (!NIL_P(keywords)) {
|
||||||
struct build_options_data data = { .options = options, .keywords = keywords };
|
struct build_options_data data = { .options = options, .keywords = keywords };
|
||||||
struct build_options_data *argument = &data;
|
struct build_options_data *argument = &data;
|
||||||
|
@ -16,6 +16,14 @@ pm_options_encoding_set(pm_options_t *options, const char *encoding) {
|
|||||||
pm_string_constant_init(&options->encoding, encoding, strlen(encoding));
|
pm_string_constant_init(&options->encoding, encoding, strlen(encoding));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the encoding_locked option on the given options struct.
|
||||||
|
*/
|
||||||
|
PRISM_EXPORTED_FUNCTION void
|
||||||
|
pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked) {
|
||||||
|
options->encoding_locked = encoding_locked;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the line option on the given options struct.
|
* Set the line option on the given options struct.
|
||||||
*/
|
*/
|
||||||
@ -215,6 +223,7 @@ pm_options_read(pm_options_t *options, const char *data) {
|
|||||||
options->frozen_string_literal = (int8_t) *data++;
|
options->frozen_string_literal = (int8_t) *data++;
|
||||||
options->command_line = (uint8_t) *data++;
|
options->command_line = (uint8_t) *data++;
|
||||||
options->version = (pm_options_version_t) *data++;
|
options->version = (pm_options_version_t) *data++;
|
||||||
|
options->encoding_locked = ((uint8_t) *data++) > 0;
|
||||||
|
|
||||||
uint32_t scopes_count = pm_options_read_u32(data);
|
uint32_t scopes_count = pm_options_read_u32(data);
|
||||||
data += 4;
|
data += 4;
|
||||||
|
@ -103,6 +103,13 @@ typedef struct {
|
|||||||
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
* - PM_OPTIONS_FROZEN_STRING_LITERAL_UNSET
|
||||||
*/
|
*/
|
||||||
int8_t frozen_string_literal;
|
int8_t frozen_string_literal;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Whether or not the encoding magic comments should be respected. This is a
|
||||||
|
* niche use-case where you want to parse a file with a specific encoding
|
||||||
|
* but ignore any encoding magic comments at the top of the file.
|
||||||
|
*/
|
||||||
|
bool encoding_locked;
|
||||||
} pm_options_t;
|
} pm_options_t;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@ -166,6 +173,14 @@ PRISM_EXPORTED_FUNCTION void pm_options_line_set(pm_options_t *options, int32_t
|
|||||||
*/
|
*/
|
||||||
PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
|
PRISM_EXPORTED_FUNCTION void pm_options_encoding_set(pm_options_t *options, const char *encoding);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the encoding_locked option on the given options struct.
|
||||||
|
*
|
||||||
|
* @param options The options struct to set the encoding_locked value on.
|
||||||
|
* @param encoding_locked The encoding_locked value to set.
|
||||||
|
*/
|
||||||
|
PRISM_EXPORTED_FUNCTION void pm_options_encoding_locked_set(pm_options_t *options, bool encoding_locked);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Set the frozen string literal option on the given options struct.
|
* Set the frozen string literal option on the given options struct.
|
||||||
*
|
*
|
||||||
|
@ -860,6 +860,14 @@ struct pm_parser {
|
|||||||
/** Whether or not we're currently recovering from a syntax error. */
|
/** Whether or not we're currently recovering from a syntax error. */
|
||||||
bool recovering;
|
bool recovering;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is very specialized behavior for when you want to parse in a context
|
||||||
|
* that does not respect encoding comments. Its main use case is translating
|
||||||
|
* into the whitequark/parser AST which re-encodes source files in UTF-8
|
||||||
|
* before they are parsed and ignores encoding comments.
|
||||||
|
*/
|
||||||
|
bool encoding_locked;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Whether or not the encoding has been changed by a magic comment. We use
|
* Whether or not the encoding has been changed by a magic comment. We use
|
||||||
* this to provide a fast path for the lexer instead of going through the
|
* this to provide a fast path for the lexer instead of going through the
|
||||||
|
@ -8261,7 +8261,7 @@ parser_lex_magic_comment(pm_parser_t *parser, bool semantic_token_seen) {
|
|||||||
|
|
||||||
// We only want to attempt to compare against encoding comments if it's
|
// We only want to attempt to compare against encoding comments if it's
|
||||||
// the first line in the file (or the second in the case of a shebang).
|
// the first line in the file (or the second in the case of a shebang).
|
||||||
if (parser->current.start == parser->encoding_comment_start) {
|
if (parser->current.start == parser->encoding_comment_start && !parser->encoding_locked) {
|
||||||
if (
|
if (
|
||||||
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
|
(key_length == 8 && pm_strncasecmp(key_source, (const uint8_t *) "encoding", 8) == 0) ||
|
||||||
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
|
(key_length == 6 && pm_strncasecmp(key_source, (const uint8_t *) "coding", 6) == 0)
|
||||||
@ -10438,7 +10438,9 @@ parser_lex(pm_parser_t *parser) {
|
|||||||
// pass and we're at the start of the file, then we need
|
// pass and we're at the start of the file, then we need
|
||||||
// to do another pass to potentially find other patterns
|
// to do another pass to potentially find other patterns
|
||||||
// for encoding comments.
|
// for encoding comments.
|
||||||
if (length >= 10) parser_lex_magic_comment_encoding(parser);
|
if (length >= 10 && !parser->encoding_locked) {
|
||||||
|
parser_lex_magic_comment_encoding(parser);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lexed_comment = true;
|
lexed_comment = true;
|
||||||
@ -21244,6 +21246,7 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|||||||
.parsing_eval = false,
|
.parsing_eval = false,
|
||||||
.command_start = true,
|
.command_start = true,
|
||||||
.recovering = false,
|
.recovering = false,
|
||||||
|
.encoding_locked = false,
|
||||||
.encoding_changed = false,
|
.encoding_changed = false,
|
||||||
.pattern_matching_newlines = false,
|
.pattern_matching_newlines = false,
|
||||||
.in_keyword_arg = false,
|
.in_keyword_arg = false,
|
||||||
@ -21291,6 +21294,9 @@ pm_parser_init(pm_parser_t *parser, const uint8_t *source, size_t size, const pm
|
|||||||
parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
|
parser_lex_magic_comment_encoding_value(parser, encoding_source, encoding_source + encoding_length);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// encoding_locked option
|
||||||
|
parser->encoding_locked = options->encoding_locked;
|
||||||
|
|
||||||
// frozen_string_literal option
|
// frozen_string_literal option
|
||||||
parser->frozen_string_literal = options->frozen_string_literal;
|
parser->frozen_string_literal = options->frozen_string_literal;
|
||||||
|
|
||||||
|
@ -60,7 +60,6 @@ module Prism
|
|||||||
# skip them for now.
|
# skip them for now.
|
||||||
skip_all = skip_incorrect | [
|
skip_all = skip_incorrect | [
|
||||||
"regex.txt",
|
"regex.txt",
|
||||||
"regex_char_width.txt",
|
|
||||||
"unescaping.txt",
|
"unescaping.txt",
|
||||||
"seattlerb/bug190.txt",
|
"seattlerb/bug190.txt",
|
||||||
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
|
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
|
||||||
|
Loading…
x
Reference in New Issue
Block a user