parse.y: indented hereoc

* parse.y: add heredoc <<~ syntax.  [Feature #9098]

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@52916 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
nobu 2015-12-07 14:39:52 +00:00
parent 9f51e95fc1
commit 9a28a29b87
8 changed files with 412 additions and 12 deletions

View File

@ -1,3 +1,7 @@
Mon Dec 7 23:39:49 2015 Ben Miller <bjmllr@gmail.com>
* parse.y: add heredoc <<~ syntax. [Feature #9098]
Mon Dec 7 23:06:16 2015 Kazuhiro NISHIYAMA <zn@mbf.nifty.com>
* prelude.rb (IO#read_nonblock): [DOC] add missing options to

View File

@ -196,6 +196,20 @@ Note that the while the closing identifier may be indented, the content is
always treated as if it is flush left. If you indent the content those spaces
will appear in the output.
To have indented content as well as an indented closing identifier, you can use
a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:
expected_result = <<~SQUIGGLY_HEREDOC
This would contain specially formatted text.
That might span many lines
SQUIGGLY_HEREDOC
The indentation of the least-indented line will be removed from each line of
the content. Note that empty lines and lines consisting solely of literal tabs
and spaces will be ignored for the purposes of determining indentation, but
escaped tabs and spaces are considered non-indentation characters.
A heredoc allows interpolation and escaped characters. You may disable
interpolation and escaping by surrounding the opening identifier with single
quotes:

View File

@ -44,28 +44,56 @@ class Ripper
end
class Lexer < ::Ripper #:nodoc: internal use only
Elem = Struct.new(:pos, :event, :tok)
def tokenize
lex().map {|pos, event, tok| tok }
parse().sort_by(&:pos).map(&:tok)
end
def lex
parse().sort_by {|pos, event, tok| pos }
parse().sort_by(&:pos).map(&:to_a)
end
def parse
@buf = []
@stack = []
super
@buf.flatten!
@buf
end
private
def _push_token(tok)
@buf.push [[lineno(), column()], __callee__, tok]
def on_heredoc_dedent(v, w)
@buf.each do |e|
if e.event == :on_tstring_content
if (n = dedent_string(e.tok, w)) > 0
e.pos[1] += n
end
end
end
v
end
SCANNER_EVENTS.each do |event|
alias_method "on_#{event}", :_push_token
def on_heredoc_beg(tok)
@stack.push @buf
buf = []
@buf << buf
@buf = buf
@buf.push Elem.new([lineno(), column()], __callee__, tok)
end
def on_heredoc_end(tok)
@buf.push Elem.new([lineno(), column()], __callee__, tok)
@buf = @stack.pop
end
def _push_token(tok)
@buf.push Elem.new([lineno(), column()], __callee__, tok)
end
(SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
alias_method event, :_push_token
end
end

View File

@ -62,7 +62,35 @@ class Ripper
class SexpBuilder < ::Ripper #:nodoc:
private
PARSER_EVENTS.each do |event|
def dedent_element(e, width)
if (n = dedent_string(e[1], width)) > 0
e[2][1] += n
end
e
end
def on_heredoc_dedent(val, width)
sub = proc do |cont|
cont.map! do |e|
if Array === e
case e[0]
when :@tstring_content
e = dedent_element(e, width)
when /_add\z/
e[1] = sub[e[1]]
end
elsif String === e
dedent_string(e, width)
end
e
end
end
sub[val]
val
end
events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym}
(PARSER_EVENTS - events).each do |event|
module_eval(<<-End, __FILE__, __LINE__ + 1)
def on_#{event}(*args)
args.unshift :#{event}
@ -83,6 +111,19 @@ class Ripper
class SexpBuilderPP < SexpBuilder #:nodoc:
private
def on_heredoc_dedent(val, width)
val.map! do |e|
next e if Symbol === e and /_content\z/ =~ e
if Array === e and e[0] == :@tstring_content
e = dedent_element(e, width)
elsif String === e
dedent_string(e, width)
end
e
end
val
end
def _dispatch_event_new
[]
end

187
parse.y
View File

@ -257,6 +257,8 @@ struct parser_params {
int toksiz;
int tokline;
int heredoc_end;
int heredoc_indent;
int heredoc_line_indent;
char *tokenbuf;
NODE *deferred_nodes;
struct local_vars *lvtbl;
@ -347,6 +349,8 @@ static int parser_yyerror(struct parser_params*, const char*);
#define lex_p (parser->lex.pcur)
#define lex_pend (parser->lex.pend)
#define heredoc_end (parser->heredoc_end)
#define heredoc_indent (parser->heredoc_indent)
#define heredoc_line_indent (parser->heredoc_line_indent)
#define command_start (parser->command_start)
#define deferred_nodes (parser->deferred_nodes)
#define lex_gets_ptr (parser->lex.gets_ptr)
@ -487,6 +491,9 @@ static int reg_fragment_check_gen(struct parser_params*, VALUE, int);
static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
#define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))
static void parser_heredoc_dedent(struct parser_params*,NODE*);
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
#define get_id(id) (id)
#define get_value(val) (val)
#else
@ -670,6 +677,9 @@ new_args_tail_gen(struct parser_params *parser, VALUE k, VALUE kr, VALUE b)
#define new_defined(expr) dispatch1(defined, (expr))
static void parser_heredoc_dedent(struct parser_params*,VALUE);
# define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
#define FIXME 0
#endif /* RIPPER */
@ -3887,6 +3897,7 @@ strings : string
else {
node = evstr2dstr(node);
}
heredoc_indent = 0;
$$ = node;
/*%
$$ = $1;
@ -3908,6 +3919,7 @@ string : tCHAR
string1 : tSTRING_BEG string_contents tSTRING_END
{
heredoc_dedent($2);
/*%%%*/
$$ = $2;
/*%
@ -3920,6 +3932,10 @@ xstring : tXSTRING_BEG xstring_contents tSTRING_END
{
/*%%%*/
NODE *node = $2;
/*%
%*/
heredoc_dedent($2);
/*%%%*/
if (!node) {
node = NEW_XSTR(STR_NEW0());
}
@ -4319,6 +4335,10 @@ string_content : tSTRING_CONTENT
$<num>$ = brace_nest;
brace_nest = 0;
}
{
$<num>$ = heredoc_indent;
heredoc_indent = 0;
}
compstmt tSTRING_DEND
{
cond_stack = $<val>1;
@ -4326,11 +4346,13 @@ string_content : tSTRING_CONTENT
lex_strterm = $<node>3;
lex_state = $<num>4;
brace_nest = $<num>5;
heredoc_indent = $<num>6;
heredoc_line_indent = -1;
/*%%%*/
if ($6) $6->flags &= ~NODE_FL_NEWLINE;
$$ = new_evstr($6);
if ($7) $7->flags &= ~NODE_FL_NEWLINE;
$$ = new_evstr($7);
/*%
$$ = dispatch1(string_embexpr, $6);
$$ = dispatch1(string_embexpr, $7);
%*/
}
;
@ -6204,6 +6226,27 @@ parser_tokadd_string(struct parser_params *parser,
} while (0)
while ((c = nextc()) != -1) {
if (heredoc_indent > 0) {
if (heredoc_line_indent == -1) {
if (c == '\n') heredoc_line_indent = 0;
}
else {
if (c == ' ') {
heredoc_line_indent++;
}
else if (c == '\t') {
int w = (heredoc_line_indent / TAB_WIDTH) + 1;
heredoc_line_indent = w * TAB_WIDTH;
}
else if (c != '\n') {
if (heredoc_indent > heredoc_line_indent) {
heredoc_indent = heredoc_line_indent;
}
heredoc_line_indent = -1;
}
}
}
if (paren && c == paren) {
++*nest;
}
@ -6465,6 +6508,12 @@ parser_heredoc_identifier(struct parser_params *parser)
c = nextc();
func = STR_FUNC_INDENT;
}
else if (c == '~') {
c = nextc();
func = STR_FUNC_INDENT;
heredoc_indent = INT_MAX;
heredoc_line_indent = 0;
}
switch (c) {
case '\'':
func |= str_squote; goto quoted;
@ -6489,7 +6538,7 @@ parser_heredoc_identifier(struct parser_params *parser)
if (!parser_is_identchar()) {
pushback(c);
if (func & STR_FUNC_INDENT) {
pushback('-');
pushback(heredoc_indent > 0 ? '~' : '-');
}
return 0;
}
@ -6534,6 +6583,114 @@ parser_heredoc_restore(struct parser_params *parser, NODE *here)
ripper_flush(parser);
}
static int
dedent_pos(const char *str, long len, int width)
{
int i, col = 0;
for (i = 0; i < len && col < width; i++) {
if (str[i] == ' ') {
col++;
}
else if (str[i] == '\t') {
int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
if (n > width) break;
col = n;
}
else {
break;
}
}
return i;
}
#ifndef RIPPER
static VALUE
parser_heredoc_dedent_string(VALUE input, int width, int first)
{
long len;
int col;
char *str, *p, *out_p, *end, *t;
RSTRING_GETMEM(input, str, len);
end = &str[len];
p = str;
if (!first) {
p = memchr(p, '\n', end - p);
if (!p) return input;
p++;
}
out_p = p;
while (p < end) {
col = dedent_pos(p, end - p, width);
p += col;
if (!(t = memchr(p, '\n', end - p)))
t = end;
else
++t;
if (p > out_p) memmove(out_p, p, t - p);
out_p += t - p;
p = t;
}
rb_str_set_len(input, out_p - str);
return input;
}
static void
parser_heredoc_dedent(struct parser_params *parser, NODE *root)
{
NODE *node, *str_node;
int first = TRUE;
int indent = heredoc_indent;
if (indent <= 0) return;
node = str_node = root;
while (str_node) {
VALUE lit = str_node->nd_lit;
if (NIL_P(parser_heredoc_dedent_string(lit, indent, first)))
compile_error(PARSER_ARG "dedent failure: %d: %"PRIsVALUE, indent, lit);
first = FALSE;
str_node = 0;
while ((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) {
if ((str_node = node->nd_head) != 0) {
enum node_type type = nd_type(str_node);
if (type == NODE_STR || type == NODE_DSTR) break;
}
}
}
}
#else /* RIPPER */
static void
parser_heredoc_dedent(struct parser_params *parser, VALUE array)
{
if (heredoc_indent <= 0) return;
dispatch2(heredoc_dedent, array, INT2NUM(heredoc_indent));
}
static VALUE
parser_dedent_string(VALUE self, VALUE input, VALUE width)
{
char *str;
long len;
int wid, col;
StringValue(input);
wid = NUM2UINT(width);
rb_str_modify(input);
RSTRING_GETMEM(input, str, len);
col = dedent_pos(str, len, wid);
MEMMOVE(str, str + col, char, len - col);
rb_str_set_len(input, len - col);
return INT2NUM(col);
}
#endif
static int
parser_whole_match_p(struct parser_params *parser,
const char *eos, long len, int indent)
@ -6685,7 +6842,15 @@ parser_here_document(struct parser_params *parser, NODE *here)
}
if (!(func & STR_FUNC_EXPAND)) {
int end = 0;
do {
#ifdef RIPPER
if (end && heredoc_indent > 0) {
set_yylval_str(str);
flush_string_content(enc);
return tSTRING_CONTENT;
}
#endif
p = RSTRING_PTR(lex_lastline);
pend = lex_pend;
if (pend > p) {
@ -6712,7 +6877,7 @@ parser_here_document(struct parser_params *parser, NODE *here)
}
goto error;
}
} while (!whole_match_p(eos, len, indent));
} while (!(end = whole_match_p(eos, len, indent)));
}
else {
/* int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/
@ -6730,11 +6895,20 @@ parser_here_document(struct parser_params *parser, NODE *here)
goto restore;
}
if (c != '\n') {
#ifdef RIPPER
flush:
#endif
set_yylval_str(STR_NEW3(tok(), toklen(), enc, func));
flush_string_content(enc);
return tSTRING_CONTENT;
}
tokadd(nextc());
#ifdef RIPPER
if (c == '\n' && heredoc_indent > 0) {
lex_goto_eol(parser);
goto flush;
}
#endif
/* if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/
if ((c = nextc()) == -1) goto error;
} while (!whole_match_p(eos, len, indent));
@ -11294,6 +11468,9 @@ InitVM_ripper(void)
rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1);
#endif
rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
ripper_init_eventids1_table(Ripper);
ripper_init_eventids2_table(Ripper);

View File

@ -431,6 +431,19 @@ class TestRipper::ParserEvents < Test::Unit::TestCase
assert_equal("heredoc1\nheredoc2\n", heredoc, bug1921)
end
def test_heredoc_dedent
thru_heredoc_dedent = false
str = width = nil
tree = parse("<""<~EOS\n heredoc\nEOS\n", :on_heredoc_dedent) {|e, s, w|
thru_heredoc_dedent = true
str = s
width = w
}
assert_equal true, thru_heredoc_dedent
assert_match(/string_content\(\), heredoc\n/, tree)
assert_equal(1, width)
end
def test_massign
thru_massign = false
parse("a, b = 1, 2", :on_massign) {thru_massign = true}

View File

@ -38,6 +38,27 @@ class TestRipper::Sexp < Test::Unit::TestCase
assert_equal "foo\n", search_sexp(:@tstring_content, sexp)[1]
end
def test_squiggly_heredoc
sexp = Ripper.sexp("<<~eot\n asdf\neot")
assert_equal "asdf\n", search_sexp(:@tstring_content, sexp)[1]
end
def test_squiggly_heredoc_with_interpolated_expression
sexp1 = Ripper.sexp(<<-eos)
<<-eot
a\#{1}z
eot
eos
sexp2 = Ripper.sexp(<<-eos)
<<~eot
a\#{1}z
eot
eos
assert_equal clear_pos(sexp1), clear_pos(sexp2)
end
def search_sexp(sym, sexp)
return sexp if !sexp or sexp[0] == sym
sexp.find do |e|
@ -46,4 +67,18 @@ class TestRipper::Sexp < Test::Unit::TestCase
end
end
end
def clear_pos(sexp)
return sexp if !sexp
sexp.each do |e|
if Array === e
if e.size == 3 and Array === (last = e.last) and
last.size == 2 and Integer === last[0] and Integer === last[1]
last.clear
else
clear_pos(e)
end
end
end
end
end if ripper_test

View File

@ -475,6 +475,94 @@ e"
assert_equal(expected, actual, "#{Bug7559}: ")
end
def test_dedented_heredoc_without_indentation
assert_equal(" y\nz\n", <<~eos)
y
z
eos
end
def test_dedented_heredoc_with_indentation
assert_equal(" a\nb\n", <<~eos)
a
b
eos
end
def test_dedented_heredoc_with_blank_less_indented_line
# the blank line has two leading spaces
result = eval("<<~eos\n" \
" a\n" \
" \n" \
" b\n" \
" eos\n")
assert_equal("a\n\nb\n", result)
end
def test_dedented_heredoc_with_blank_less_indented_line_escaped
result = eval("<<~eos\n" \
" a\n" \
"\\ \\ \n" \
" b\n" \
" eos\n")
assert_equal(" a\n \n b\n", result)
end
def test_dedented_heredoc_with_blank_more_indented_line
# the blank line has six leading spaces
result = eval("<<~eos\n" \
" a\n" \
" \n" \
" b\n" \
" eos\n")
assert_equal("a\n \nb\n", result)
end
def test_dedented_heredoc_with_blank_more_indented_line_escaped
result = eval("<<~eos\n" \
" a\n" \
"\\ \\ \\ \\ \\ \\ \n" \
" b\n" \
" eos\n")
assert_equal(" a\n \n b\n", result)
end
def test_dedented_heredoc_with_empty_line
result = eval("<<~eos\n" \
" This would contain specially formatted text.\n" \
"\n" \
" That might span many lines\n" \
" eos\n")
assert_equal(<<-eos, result)
This would contain specially formatted text.
That might span many lines
eos
end
def test_dedented_heredoc_with_interpolated_expression
result = eval(" <<~eos\n" \
" #{1}a\n" \
" zy\n" \
" eos\n")
assert_equal(<<-eos, result)
#{1}a
zy
eos
end
def test_dedented_heredoc_with_interpolated_string
w = ""
result = eval("<<~eos\n" \
" \#{w} a\n" \
" zy\n" \
" eos\n")
assert_equal(<<-eos, result)
#{w} a
zy
eos
end
def test_lineno_after_heredoc
bug7559 = '[ruby-dev:46737]'
expected, _, actual = __LINE__, <<eom, __LINE__