Respect the encoding of the source [Bug #18827]
Do not override the input string encoding at the time of preparation, the source encoding is not determined from the input yet.
This commit is contained in:
parent
278fefb962
commit
cd5cafa4a3
Notes:
git
2022-06-17 01:49:16 +09:00
26
parse.y
26
parse.y
@ -6462,12 +6462,6 @@ lex_getline(struct parser_params *p)
|
|||||||
if (NIL_P(line)) return line;
|
if (NIL_P(line)) return line;
|
||||||
must_be_ascii_compatible(line);
|
must_be_ascii_compatible(line);
|
||||||
if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen
|
if (RB_OBJ_FROZEN(line)) line = rb_str_dup(line); // needed for RubyVM::AST.of because script_lines in iseq is deep-frozen
|
||||||
#ifndef RIPPER
|
|
||||||
if (p->debug_lines) {
|
|
||||||
rb_enc_associate(line, p->enc);
|
|
||||||
rb_ary_push(p->debug_lines, line);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
p->line_count++;
|
p->line_count++;
|
||||||
return line;
|
return line;
|
||||||
}
|
}
|
||||||
@ -6614,7 +6608,7 @@ add_delayed_token(struct parser_params *p, const char *tok, const char *end)
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
static int
|
static int
|
||||||
nextline(struct parser_params *p)
|
nextline(struct parser_params *p, int set_encoding)
|
||||||
{
|
{
|
||||||
VALUE v = p->lex.nextline;
|
VALUE v = p->lex.nextline;
|
||||||
p->lex.nextline = 0;
|
p->lex.nextline = 0;
|
||||||
@ -6632,6 +6626,12 @@ nextline(struct parser_params *p)
|
|||||||
lex_goto_eol(p);
|
lex_goto_eol(p);
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
#ifndef RIPPER
|
||||||
|
if (p->debug_lines) {
|
||||||
|
if (set_encoding) rb_enc_associate(v, p->enc);
|
||||||
|
rb_ary_push(p->debug_lines, v);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
p->cr_seen = FALSE;
|
p->cr_seen = FALSE;
|
||||||
}
|
}
|
||||||
else if (NIL_P(v)) {
|
else if (NIL_P(v)) {
|
||||||
@ -6663,12 +6663,12 @@ parser_cr(struct parser_params *p, int c)
|
|||||||
}
|
}
|
||||||
|
|
||||||
static inline int
|
static inline int
|
||||||
nextc(struct parser_params *p)
|
nextc0(struct parser_params *p, int set_encoding)
|
||||||
{
|
{
|
||||||
int c;
|
int c;
|
||||||
|
|
||||||
if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
|
if (UNLIKELY((p->lex.pcur == p->lex.pend) || p->eofp || RTEST(p->lex.nextline))) {
|
||||||
if (nextline(p)) return -1;
|
if (nextline(p, set_encoding)) return -1;
|
||||||
}
|
}
|
||||||
c = (unsigned char)*p->lex.pcur++;
|
c = (unsigned char)*p->lex.pcur++;
|
||||||
if (UNLIKELY(c == '\r')) {
|
if (UNLIKELY(c == '\r')) {
|
||||||
@ -6677,6 +6677,7 @@ nextc(struct parser_params *p)
|
|||||||
|
|
||||||
return c;
|
return c;
|
||||||
}
|
}
|
||||||
|
#define nextc(p) nextc0(p, TRUE)
|
||||||
|
|
||||||
static void
|
static void
|
||||||
pushback(struct parser_params *p, int c)
|
pushback(struct parser_params *p, int c)
|
||||||
@ -8467,7 +8468,7 @@ set_file_encoding(struct parser_params *p, const char *str, const char *send)
|
|||||||
static void
|
static void
|
||||||
parser_prepare(struct parser_params *p)
|
parser_prepare(struct parser_params *p)
|
||||||
{
|
{
|
||||||
int c = nextc(p);
|
int c = nextc0(p, FALSE);
|
||||||
p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose);
|
p->token_info_enabled = !compile_for_eval && RTEST(ruby_verbose);
|
||||||
switch (c) {
|
switch (c) {
|
||||||
case '#':
|
case '#':
|
||||||
@ -8479,6 +8480,11 @@ parser_prepare(struct parser_params *p)
|
|||||||
(unsigned char)p->lex.pcur[1] == 0xbf) {
|
(unsigned char)p->lex.pcur[1] == 0xbf) {
|
||||||
p->enc = rb_utf8_encoding();
|
p->enc = rb_utf8_encoding();
|
||||||
p->lex.pcur += 2;
|
p->lex.pcur += 2;
|
||||||
|
#ifndef RIPPER
|
||||||
|
if (p->debug_lines) {
|
||||||
|
rb_enc_associate(p->lex.lastline, p->enc);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
p->lex.pbeg = p->lex.pcur;
|
p->lex.pbeg = p->lex.pcur;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -542,6 +542,19 @@ dummy
|
|||||||
assert_equal("def test_keep_script_lines_for_of\n", node_method.source.lines.first)
|
assert_equal("def test_keep_script_lines_for_of\n", node_method.source.lines.first)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_encoding_with_keep_script_lines
|
||||||
|
enc = Encoding::EUC_JP
|
||||||
|
code = "__ENCODING__".encode(enc)
|
||||||
|
|
||||||
|
assert_equal(enc, eval(code))
|
||||||
|
|
||||||
|
node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: false)
|
||||||
|
assert_equal(enc, node.children[2].children[0])
|
||||||
|
|
||||||
|
node = RubyVM::AbstractSyntaxTree.parse(code, keep_script_lines: true)
|
||||||
|
assert_equal(enc, node.children[2].children[0])
|
||||||
|
end
|
||||||
|
|
||||||
def test_e_option
|
def test_e_option
|
||||||
assert_in_out_err(["-e", "def foo; end; pp RubyVM::AbstractSyntaxTree.of(method(:foo)).type"],
|
assert_in_out_err(["-e", "def foo; end; pp RubyVM::AbstractSyntaxTree.of(method(:foo)).type"],
|
||||||
"", [":SCOPE"], [])
|
"", [":SCOPE"], [])
|
||||||
|
@ -66,6 +66,15 @@ class TestSyntax < Test::Unit::TestCase
|
|||||||
f&.close!
|
f&.close!
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def test_script_lines_encoding
|
||||||
|
require 'tmpdir'
|
||||||
|
Dir.mktmpdir do |dir|
|
||||||
|
File.write(File.join(dir, "script_lines.rb"), "SCRIPT_LINES__ = {}\n")
|
||||||
|
assert_in_out_err(%w"-r./script_lines -w -Ke", "puts __ENCODING__.name",
|
||||||
|
%w"EUC-JP", /-K is specified/, chdir: dir)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def test_anonymous_block_forwarding
|
def test_anonymous_block_forwarding
|
||||||
assert_syntax_error("def b; c(&); end", /no anonymous block parameter/)
|
assert_syntax_error("def b; c(&); end", /no anonymous block parameter/)
|
||||||
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
|
assert_separately([], "#{<<-"begin;"}\n#{<<-'end;'}")
|
||||||
|
Loading…
x
Reference in New Issue
Block a user