parse.y: indented hereoc

* parse.y: add heredoc <<~ syntax.  [Feature #9098]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@52916 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2015-12-07 14:39:52 +00:00
parent 9f51e95fc1
commit 9a28a29b87
8 changed files with 412 additions and 12 deletions

View File

@ -1,3 +1,7 @@
Mon Dec 7 23:39:49 2015 Ben Miller <bjmllr@gmail.com>
* parse.y: add heredoc <<~ syntax. [Feature #9098]
Mon Dec 7 23:06:16 2015 Kazuhiro NISHIYAMA <zn@mbf.nifty.com> Mon Dec 7 23:06:16 2015 Kazuhiro NISHIYAMA <zn@mbf.nifty.com>
* prelude.rb (IO#read_nonblock): [DOC] add missing options to * prelude.rb (IO#read_nonblock): [DOC] add missing options to

View File

@ -196,6 +196,20 @@ Note that the while the closing identifier may be indented, the content is
always treated as if it is flush left. If you indent the content those spaces always treated as if it is flush left. If you indent the content those spaces
will appear in the output. will appear in the output.
To have indented content as well as an indented closing identifier, you can use
a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:
expected_result = <<~SQUIGGLY_HEREDOC
This would contain specially formatted text.
That might span many lines
SQUIGGLY_HEREDOC
The indentation of the least-indented line will be removed from each line of
the content. Note that empty lines and lines consisting solely of literal tabs
and spaces will be ignored for the purposes of determining indentation, but
escaped tabs and spaces are considered non-indentation characters.
A heredoc allows interpolation and escaped characters. You may disable A heredoc allows interpolation and escaped characters. You may disable
interpolation and escaping by surrounding the opening identifier with single interpolation and escaping by surrounding the opening identifier with single
quotes: quotes:

View File

@ -44,28 +44,56 @@ class Ripper
end end
class Lexer < ::Ripper #:nodoc: internal use only class Lexer < ::Ripper #:nodoc: internal use only
Elem = Struct.new(:pos, :event, :tok)
def tokenize def tokenize
lex().map {|pos, event, tok| tok } parse().sort_by(&:pos).map(&:tok)
end end
def lex def lex
parse().sort_by {|pos, event, tok| pos } parse().sort_by(&:pos).map(&:to_a)
end end
def parse def parse
@buf = [] @buf = []
@stack = []
super super
@buf.flatten!
@buf @buf
end end
private private
def _push_token(tok) def on_heredoc_dedent(v, w)
@buf.push [[lineno(), column()], __callee__, tok] @buf.each do |e|
if e.event == :on_tstring_content
if (n = dedent_string(e.tok, w)) > 0
e.pos[1] += n
end
end
end
v
end end
SCANNER_EVENTS.each do |event| def on_heredoc_beg(tok)
alias_method "on_#{event}", :_push_token @stack.push @buf
buf = []
@buf << buf
@buf = buf
@buf.push Elem.new([lineno(), column()], __callee__, tok)
end
def on_heredoc_end(tok)
@buf.push Elem.new([lineno(), column()], __callee__, tok)
@buf = @stack.pop
end
def _push_token(tok)
@buf.push Elem.new([lineno(), column()], __callee__, tok)
end
(SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
alias_method event, :_push_token
end end
end end

View File

@ -62,7 +62,35 @@ class Ripper
class SexpBuilder < ::Ripper #:nodoc: class SexpBuilder < ::Ripper #:nodoc:
private private
PARSER_EVENTS.each do |event| def dedent_element(e, width)
if (n = dedent_string(e[1], width)) > 0
e[2][1] += n
end
e
end
def on_heredoc_dedent(val, width)
sub = proc do |cont|
cont.map! do |e|
if Array === e
case e[0]
when :@tstring_content
e = dedent_element(e, width)
when /_add\z/
e[1] = sub[e[1]]
end
elsif String === e
dedent_string(e, width)
end
e
end
end
sub[val]
val
end
events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym}
(PARSER_EVENTS - events).each do |event|
module_eval(<<-End, __FILE__, __LINE__ + 1) module_eval(<<-End, __FILE__, __LINE__ + 1)
def on_#{event}(*args) def on_#{event}(*args)
args.unshift :#{event} args.unshift :#{event}
@ -83,6 +111,19 @@ class Ripper
class SexpBuilderPP < SexpBuilder #:nodoc: class SexpBuilderPP < SexpBuilder #:nodoc:
private private
def on_heredoc_dedent(val, width)
val.map! do |e|
next e if Symbol === e and /_content\z/ =~ e
if Array === e and e[0] == :@tstring_content
e = dedent_element(e, width)
elsif String === e
dedent_string(e, width)
end
e
end
val
end
def _dispatch_event_new def _dispatch_event_new
[] []
end end

187
parse.y
View File

@ -257,6 +257,8 @@ struct parser_params {
int toksiz; int toksiz;
int tokline; int tokline;
int heredoc_end; int heredoc_end;
int heredoc_indent;
int heredoc_line_indent;
char *tokenbuf; char *tokenbuf;
NODE *deferred_nodes; NODE *deferred_nodes;
struct local_vars *lvtbl; struct local_vars *lvtbl;
@ -347,6 +349,8 @@ static int parser_yyerror(struct parser_params*, const char*);
#define lex_p (parser->lex.pcur) #define lex_p (parser->lex.pcur)
#define lex_pend (parser->lex.pend) #define lex_pend (parser->lex.pend)
#define heredoc_end (parser->heredoc_end) #define heredoc_end (parser->heredoc_end)
#define heredoc_indent (parser->heredoc_indent)
#define heredoc_line_indent (parser->heredoc_line_indent)
#define command_start (parser->command_start) #define command_start (parser->command_start)
#define deferred_nodes (parser->deferred_nodes) #define deferred_nodes (parser->deferred_nodes)
#define lex_gets_ptr (parser->lex.gets_ptr) #define lex_gets_ptr (parser->lex.gets_ptr)
@ -487,6 +491,9 @@ static int reg_fragment_check_gen(struct parser_params*, VALUE, int);
static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match); static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match)) #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))
static void parser_heredoc_dedent(struct parser_params*,NODE*);
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
#define get_id(id) (id) #define get_id(id) (id)
#define get_value(val) (val) #define get_value(val) (val)
#else #else
@ -670,6 +677,9 @@ new_args_tail_gen(struct parser_params *parser, VALUE k, VALUE kr, VALUE b)
#define new_defined(expr) dispatch1(defined, (expr)) #define new_defined(expr) dispatch1(defined, (expr))
static void parser_heredoc_dedent(struct parser_params*,VALUE);
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
#define FIXME 0 #define FIXME 0
#endif /* RIPPER */ #endif /* RIPPER */
@ -3887,6 +3897,7 @@ strings : string
else { else {
node = evstr2dstr(node); node = evstr2dstr(node);
} }
heredoc_indent = 0;
$$ = node; $$ = node;
/*% /*%
$$ = $1; $$ = $1;
@ -3908,6 +3919,7 @@ string : tCHAR
string1 : tSTRING_BEG string_contents tSTRING_END string1 : tSTRING_BEG string_contents tSTRING_END
{ {
heredoc_dedent($2);
/*%%%*/ /*%%%*/
$$ = $2; $$ = $2;
/*% /*%
@ -3920,6 +3932,10 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END
{ {
/*%%%*/ /*%%%*/
NODE *node = $2; NODE *node = $2;
/*%
%*/
heredoc_dedent($2);
/*%%%*/
if (!node) { if (!node) {
node = NEW_XSTR(STR_NEW0()); node = NEW_XSTR(STR_NEW0());
} }
@ -4319,6 +4335,10 @@ string_content : tSTRING_CONTENT
$<num>$ = brace_nest; $<num>$ = brace_nest;
brace_nest = 0; brace_nest = 0;
} }
{
$<num>$ = heredoc_indent;
heredoc_indent = 0;
}
compstmt tSTRING_DEND compstmt tSTRING_DEND
{ {
cond_stack = $<val>1; cond_stack = $<val>1;
@ -4326,11 +4346,13 @@ string_content : tSTRING_CONTENT
lex_strterm = $<node>3; lex_strterm = $<node>3;
lex_state = $<num>4; lex_state = $<num>4;
brace_nest = $<num>5; brace_nest = $<num>5;
heredoc_indent = $<num>6;
heredoc_line_indent = -1;
/*%%%*/ /*%%%*/
if ($6) $6->flags &= ~NODE_FL_NEWLINE; if ($7) $7->flags &= ~NODE_FL_NEWLINE;
$$ = new_evstr($6); $$ = new_evstr($7);
/*% /*%
$$ = dispatch1(string_embexpr, $6); $$ = dispatch1(string_embexpr, $7);
%*/ %*/
} }
; ;
@ -6204,6 +6226,27 @@ parser_tokadd_string(struct parser_params *parser,
} while (0) } while (0)
while ((c = nextc()) != -1) { while ((c = nextc()) != -1) {
if (heredoc_indent > 0) {
if (heredoc_line_indent == -1) {
if (c == '\n') heredoc_line_indent = 0;
}
else {
if (c == ' ') {
heredoc_line_indent++;
}
else if (c == '\t') {
int w = (heredoc_line_indent / TAB_WIDTH) + 1;
heredoc_line_indent = w * TAB_WIDTH;
}
else if (c != '\n') {
if (heredoc_indent > heredoc_line_indent) {
heredoc_indent = heredoc_line_indent;
}
heredoc_line_indent = -1;
}
}
}
if (paren && c == paren) { if (paren && c == paren) {
++*nest; ++*nest;
} }
@ -6465,6 +6508,12 @@ parser_heredoc_identifier(struct parser_params *parser)
c = nextc(); c = nextc();
func = STR_FUNC_INDENT; func = STR_FUNC_INDENT;
} }
else if (c == '~') {
c = nextc();
func = STR_FUNC_INDENT;
heredoc_indent = INT_MAX;
heredoc_line_indent = 0;
}
switch (c) { switch (c) {
case '\'': case '\'':
func |= str_squote; goto quoted; func |= str_squote; goto quoted;
@ -6489,7 +6538,7 @@ parser_heredoc_identifier(struct parser_params *parser)
if (!parser_is_identchar()) { if (!parser_is_identchar()) {
pushback(c); pushback(c);
if (func & STR_FUNC_INDENT) { if (func & STR_FUNC_INDENT) {
pushback('-'); pushback(heredoc_indent > 0 ? '~' : '-');
} }
return 0; return 0;
} }
@ -6534,6 +6583,114 @@ parser_heredoc_restore(struct parser_params *parser, NODE *here)
ripper_flush(parser); ripper_flush(parser);
} }
static int
dedent_pos(const char *str, long len, int width)
{
int i, col = 0;
for (i = 0; i < len && col < width; i++) {
if (str[i] == ' ') {
col++;
}
else if (str[i] == '\t') {
int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
if (n > width) break;
col = n;
}
else {
break;
}
}
return i;
}
#ifndef RIPPER
static VALUE
parser_heredoc_dedent_string(VALUE input, int width, int first)
{
long len;
int col;
char *str, *p, *out_p, *end, *t;
RSTRING_GETMEM(input, str, len);
end = &str[len];
p = str;
if (!first) {
p = memchr(p, '\n', end - p);
if (!p) return input;
p++;
}
out_p = p;
while (p < end) {
col = dedent_pos(p, end - p, width);
p += col;
if (!(t = memchr(p, '\n', end - p)))
t = end;
else
++t;
if (p > out_p) memmove(out_p, p, t - p);
out_p += t - p;
p = t;
}
rb_str_set_len(input, out_p - str);
return input;
}
static void
parser_heredoc_dedent(struct parser_params *parser, NODE *root)
{
NODE *node, *str_node;
int first = TRUE;
int indent = heredoc_indent;
if (indent <= 0) return;
node = str_node = root;
while (str_node) {
VALUE lit = str_node->nd_lit;
if (NIL_P(parser_heredoc_dedent_string(lit, indent, first)))
compile_error(PARSER_ARG "dedent failure: %d: %"PRIsVALUE, indent, lit);
first = FALSE;
str_node = 0;
while ((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) {
if ((str_node = node->nd_head) != 0) {
enum node_type type = nd_type(str_node);
if (type == NODE_STR || type == NODE_DSTR) break;
}
}
}
}
#else /* RIPPER */
static void
parser_heredoc_dedent(struct parser_params *parser, VALUE array)
{
if (heredoc_indent <= 0) return;
dispatch2(heredoc_dedent, array, INT2NUM(heredoc_indent));
}
static VALUE
parser_dedent_string(VALUE self, VALUE input, VALUE width)
{
char *str;
long len;
int wid, col;
StringValue(input);
wid = NUM2UINT(width);
rb_str_modify(input);
RSTRING_GETMEM(input, str, len);
col = dedent_pos(str, len, wid);
MEMMOVE(str, str + col, char, len - col);
rb_str_set_len(input, len - col);
return INT2NUM(col);
}
#endif
static int static int
parser_whole_match_p(struct parser_params *parser, parser_whole_match_p(struct parser_params *parser,
const char *eos, long len, int indent) const char *eos, long len, int indent)
@ -6685,7 +6842,15 @@ parser_here_document(struct parser_params *parser, NODE *here)
} }
if (!(func & STR_FUNC_EXPAND)) { if (!(func & STR_FUNC_EXPAND)) {
int end = 0;
do { do {
#ifdef RIPPER
if (end && heredoc_indent > 0) {
set_yylval_str(str);
flush_string_content(enc);
return tSTRING_CONTENT;
}
#endif
p = RSTRING_PTR(lex_lastline); p = RSTRING_PTR(lex_lastline);
pend = lex_pend; pend = lex_pend;
if (pend > p) { if (pend > p) {
@ -6712,7 +6877,7 @@ parser_here_document(struct parser_params *parser, NODE *here)
} }
goto error; goto error;
} }
} while (!whole_match_p(eos, len, indent)); } while (!(end = whole_match_p(eos, len, indent)));
} }
else { else {
/* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/ /* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/
@ -6730,11 +6895,20 @@ parser_here_document(struct parser_params *parser, NODE *here)
goto restore; goto restore;
} }
if (c != '\n') { if (c != '\n') {
#ifdef RIPPER
flush:
#endif
set_yylval_str(STR_NEW3(tok(), toklen(), enc, func)); set_yylval_str(STR_NEW3(tok(), toklen(), enc, func));
flush_string_content(enc); flush_string_content(enc);
return tSTRING_CONTENT; return tSTRING_CONTENT;
} }
tokadd(nextc()); tokadd(nextc());
#ifdef RIPPER
if (c == '\n' && heredoc_indent > 0) {
lex_goto_eol(parser);
goto flush;
}
#endif
/* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/ /* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/
if ((c = nextc()) == -1) goto error; if ((c = nextc()) == -1) goto error;
} while (!whole_match_p(eos, len, indent)); } while (!whole_match_p(eos, len, indent));
@ -11294,6 +11468,9 @@ InitVM_ripper(void)
rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1); rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1);
#endif #endif
rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
ripper_init_eventids1_table(Ripper); ripper_init_eventids1_table(Ripper);
ripper_init_eventids2_table(Ripper); ripper_init_eventids2_table(Ripper);

View File

@ -431,6 +431,19 @@ class TestRipper::ParserEvents < Test::Unit::TestCase
assert_equal("heredoc1\nheredoc2\n", heredoc, bug1921) assert_equal("heredoc1\nheredoc2\n", heredoc, bug1921)
end end
def test_heredoc_dedent
thru_heredoc_dedent = false
str = width = nil
tree = parse("<""<~EOS\n heredoc\nEOS\n", :on_heredoc_dedent) {|e, s, w|
thru_heredoc_dedent = true
str = s
width = w
}
assert_equal true, thru_heredoc_dedent
assert_match(/string_content\(\), heredoc\n/, tree)
assert_equal(1, width)
end
def test_massign def test_massign
thru_massign = false thru_massign = false
parse("a, b = 1, 2", :on_massign) {thru_massign = true} parse("a, b = 1, 2", :on_massign) {thru_massign = true}

View File

@ -38,6 +38,27 @@ class TestRipper::Sexp < Test::Unit::TestCase
assert_equal "foo\n", search_sexp(:@tstring_content, sexp)[1] assert_equal "foo\n", search_sexp(:@tstring_content, sexp)[1]
end end
def test_squiggly_heredoc
sexp = Ripper.sexp("<<~eot\n asdf\neot")
assert_equal "asdf\n", search_sexp(:@tstring_content, sexp)[1]
end
def test_squiggly_heredoc_with_interpolated_expression
sexp1 = Ripper.sexp(<<-eos)
<<-eot
a\#{1}z
eot
eos
sexp2 = Ripper.sexp(<<-eos)
<<~eot
a\#{1}z
eot
eos
assert_equal clear_pos(sexp1), clear_pos(sexp2)
end
def search_sexp(sym, sexp) def search_sexp(sym, sexp)
return sexp if !sexp or sexp[0] == sym return sexp if !sexp or sexp[0] == sym
sexp.find do |e| sexp.find do |e|
@ -46,4 +67,18 @@ class TestRipper::Sexp < Test::Unit::TestCase
end end
end end
end end
def clear_pos(sexp)
return sexp if !sexp
sexp.each do |e|
if Array === e
if e.size == 3 and Array === (last = e.last) and
last.size == 2 and Integer === last[0] and Integer === last[1]
last.clear
else
clear_pos(e)
end
end
end
end
end if ripper_test end if ripper_test

View File

@ -475,6 +475,94 @@ e"
assert_equal(expected, actual, "#{Bug7559}: ") assert_equal(expected, actual, "#{Bug7559}: ")
end end
def test_dedented_heredoc_without_indentation
assert_equal(" y\nz\n", <<~eos)
y
z
eos
end
def test_dedented_heredoc_with_indentation
assert_equal(" a\nb\n", <<~eos)
a
b
eos
end
def test_dedented_heredoc_with_blank_less_indented_line
# the blank line has two leading spaces
result = eval("<<~eos\n" \
" a\n" \
" \n" \
" b\n" \
" eos\n")
assert_equal("a\n\nb\n", result)
end
def test_dedented_heredoc_with_blank_less_indented_line_escaped
result = eval("<<~eos\n" \
" a\n" \
"\\ \\ \n" \
" b\n" \
" eos\n")
assert_equal(" a\n \n b\n", result)
end
def test_dedented_heredoc_with_blank_more_indented_line
# the blank line has six leading spaces
result = eval("<<~eos\n" \
" a\n" \
" \n" \
" b\n" \
" eos\n")
assert_equal("a\n \nb\n", result)
end
def test_dedented_heredoc_with_blank_more_indented_line_escaped
result = eval("<<~eos\n" \
" a\n" \
"\\ \\ \\ \\ \\ \\ \n" \
" b\n" \
" eos\n")
assert_equal(" a\n \n b\n", result)
end
def test_dedented_heredoc_with_empty_line
result = eval("<<~eos\n" \
" This would contain specially formatted text.\n" \
"\n" \
" That might span many lines\n" \
" eos\n")
assert_equal(<<-eos, result)
This would contain specially formatted text.
That might span many lines
eos
end
def test_dedented_heredoc_with_interpolated_expression
result = eval(" <<~eos\n" \
" #{1}a\n" \
" zy\n" \
" eos\n")
assert_equal(<<-eos, result)
#{1}a
zy
eos
end
def test_dedented_heredoc_with_interpolated_string
w = ""
result = eval("<<~eos\n" \
" \#{w} a\n" \
" zy\n" \
" eos\n")
assert_equal(<<-eos, result)
#{w} a
zy
eos
end
def test_lineno_after_heredoc def test_lineno_after_heredoc
bug7559 = '[ruby-dev:46737]' bug7559 = '[ruby-dev:46737]'
expected, _, actual = __LINE__, <<eom, __LINE__ expected, _, actual = __LINE__, <<eom, __LINE__