parse.y: indented hereoc

* parse.y: add heredoc <<~ syntax. [Feature #9098] git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@52916 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2015-12-07 14:39:52 +00:00 · 2015-12-07 14:39:52 +00:00 · 9a28a29b87
commit 9a28a29b87
parent 9f51e95fc1
8 changed files with 412 additions and 12 deletions
--- a/4
+++ b/4
@ -1,3 +1,7 @@
 Mon Dec  7 23:39:49 2015  Ben Miller  <bjmllr@gmail.com>
 	* parse.y: add heredoc <<~ syntax.  [Feature #9098]
 Mon Dec  7 23:06:16 2015  Kazuhiro NISHIYAMA  <zn@mbf.nifty.com>
 	* prelude.rb (IO#read_nonblock): [DOC] add missing options to
--- a/doc/syntax/literals.rdoc
+++ b/doc/syntax/literals.rdoc
@ -196,6 +196,20 @@ Note that the while the closing identifier may be indented, the content is
 always treated as if it is flush left.  If you indent the content those spaces
 will appear in the output.
 To have indented content as well as an indented closing identifier, you can use
 a "squiggly" heredoc, which uses a "~" instead of a "-" after <tt><<</tt>:
    expected_result = <<~SQUIGGLY_HEREDOC
      This would contain specially formatted text.
      That might span many lines
    SQUIGGLY_HEREDOC
 The indentation of the least-indented line will be removed from each line of
 the content.  Note that empty lines and lines consisting solely of literal tabs
 and spaces will be ignored for the purposes of determining indentation, but
 escaped tabs and spaces are considered non-indentation characters.
 A heredoc allows interpolation and escaped characters.  You may disable
 interpolation and escaping by surrounding the opening identifier with single
 quotes:
--- a/ext/ripper/lib/ripper/lexer.rb
+++ b/ext/ripper/lib/ripper/lexer.rb
@ -44,28 +44,56 @@ class Ripper
  end
  class Lexer < ::Ripper   #:nodoc: internal use only
    Elem = Struct.new(:pos, :event, :tok)
    def tokenize
-      lex().map {|pos, event, tok| tok }
+      parse().sort_by(&:pos).map(&:tok)
    end
    def lex
-      parse().sort_by {|pos, event, tok| pos }
+      parse().sort_by(&:pos).map(&:to_a)
    end
    def parse
      @buf = []
      @stack = []
      super
      @buf.flatten!
      @buf
    end
    private
-    def _push_token(tok)
+    def on_heredoc_dedent(v, w)
-      @buf.push [[lineno(), column()], __callee__, tok]
+      @buf.each do |e|
        if e.event == :on_tstring_content
          if (n = dedent_string(e.tok, w)) > 0
            e.pos[1] += n
          end
        end
      end
      v
    end
-    SCANNER_EVENTS.each do |event|
+    def on_heredoc_beg(tok)
-      alias_method "on_#{event}", :_push_token
+      @stack.push @buf
      buf = []
      @buf << buf
      @buf = buf
      @buf.push Elem.new([lineno(), column()], __callee__, tok)
    end
    def on_heredoc_end(tok)
      @buf.push Elem.new([lineno(), column()], __callee__, tok)
      @buf = @stack.pop
    end
    def _push_token(tok)
      @buf.push Elem.new([lineno(), column()], __callee__, tok)
    end
    (SCANNER_EVENTS.map {|event|:"on_#{event}"} - private_instance_methods(false)).each do |event|
      alias_method event, :_push_token
    end
  end
--- a/ext/ripper/lib/ripper/sexp.rb
+++ b/ext/ripper/lib/ripper/sexp.rb
@ -62,7 +62,35 @@ class Ripper
  class SexpBuilder < ::Ripper   #:nodoc:
    private
-    PARSER_EVENTS.each do |event|
+    def dedent_element(e, width)
      if (n = dedent_string(e[1], width)) > 0
        e[2][1] += n
      end
      e
    end
    def on_heredoc_dedent(val, width)
      sub = proc do |cont|
        cont.map! do |e|
          if Array === e
            case e[0]
            when :@tstring_content
              e = dedent_element(e, width)
            when /_add\z/
              e[1] = sub[e[1]]
            end
          elsif String === e
            dedent_string(e, width)
          end
          e
        end
      end
      sub[val]
      val
    end
    events = private_instance_methods(false).grep(/\Aon_/) {$'.to_sym}
    (PARSER_EVENTS - events).each do |event|
      module_eval(<<-End, __FILE__, __LINE__ + 1)
        def on_#{event}(*args)
          args.unshift :#{event}
@ -83,6 +111,19 @@ class Ripper
  class SexpBuilderPP < SexpBuilder #:nodoc:
    private
    def on_heredoc_dedent(val, width)
      val.map! do |e|
        next e if Symbol === e and /_content\z/ =~ e
        if Array === e and e[0] == :@tstring_content
          e = dedent_element(e, width)
        elsif String === e
          dedent_string(e, width)
        end
        e
      end
      val
    end
    def _dispatch_event_new
      []
    end
--- a/parse.y
+++ b/parse.y
@ -257,6 +257,8 @@ struct parser_params {
    int toksiz;
    int tokline;
    int heredoc_end;
    int heredoc_indent;
    int heredoc_line_indent;
    char *tokenbuf;
    NODE *deferred_nodes;
    struct local_vars *lvtbl;
@ -347,6 +349,8 @@ static int parser_yyerror(struct parser_params*, const char*);
 #define lex_p			(parser->lex.pcur)
 #define lex_pend		(parser->lex.pend)
 #define heredoc_end		(parser->heredoc_end)
 #define heredoc_indent		(parser->heredoc_indent)
 #define heredoc_line_indent	(parser->heredoc_line_indent)
 #define command_start		(parser->command_start)
 #define deferred_nodes		(parser->deferred_nodes)
 #define lex_gets_ptr		(parser->lex.gets_ptr)
@ -487,6 +491,9 @@ static int reg_fragment_check_gen(struct parser_params*, VALUE, int);
 static NODE *reg_named_capture_assign_gen(struct parser_params* parser, VALUE regexp, NODE *match);
 #define reg_named_capture_assign(regexp,match) reg_named_capture_assign_gen(parser,(regexp),(match))
 static void parser_heredoc_dedent(struct parser_params*,NODE*);
 # define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
 #define get_id(id) (id)
 #define get_value(val) (val)
 #else
@ -670,6 +677,9 @@ new_args_tail_gen(struct parser_params *parser, VALUE k, VALUE kr, VALUE b)
 #define new_defined(expr) dispatch1(defined, (expr))
 static void parser_heredoc_dedent(struct parser_params*,VALUE);
 # define heredoc_dedent(str) parser_heredoc_dedent(parser, (str))
 #define FIXME 0
 #endif /* RIPPER */
@ -3887,6 +3897,7 @@ strings		: string
 			else {
 			    node = evstr2dstr(node);
 			}
 			heredoc_indent = 0;
 			$$ = node;
 		    /*%
 			$$ = $1;
@ -3908,6 +3919,7 @@ string		: tCHAR
 string1		: tSTRING_BEG string_contents tSTRING_END
 		    {
 			heredoc_dedent($2);
 		    /*%%%*/
 			$$ = $2;
 		    /*%
@ -3920,6 +3932,10 @@ xstring		: tXSTRING_BEG xstring_contents tSTRING_END
 		    {
 		    /*%%%*/
 			NODE *node = $2;
 		    /*%
 		    %*/
 			heredoc_dedent($2);
 		    /*%%%*/
 			if (!node) {
 			    node = NEW_XSTR(STR_NEW0());
 			}
@ -4319,6 +4335,10 @@ string_content	: tSTRING_CONTENT
 			$<num>$ = brace_nest;
 			brace_nest = 0;
 		    }
 		    {
 			$<num>$ = heredoc_indent;
 			heredoc_indent = 0;
 		    }
 		  compstmt tSTRING_DEND
 		    {
 			cond_stack = $<val>1;
@ -4326,11 +4346,13 @@ string_content	: tSTRING_CONTENT
 			lex_strterm = $<node>3;
 			lex_state = $<num>4;
 			brace_nest = $<num>5;
 			heredoc_indent = $<num>6;
 			heredoc_line_indent = -1;
 		    /*%%%*/
-			if ($6) $6->flags &= ~NODE_FL_NEWLINE;
+			if ($7) $7->flags &= ~NODE_FL_NEWLINE;
-			$$ = new_evstr($6);
+			$$ = new_evstr($7);
 		    /*%
-			$$ = dispatch1(string_embexpr, $6);
+			$$ = dispatch1(string_embexpr, $7);
 		    %*/
 		    }
 		;
@ -6204,6 +6226,27 @@ parser_tokadd_string(struct parser_params *parser,
    } while (0)
    while ((c = nextc()) != -1) {
 	if (heredoc_indent > 0) {
 	    if (heredoc_line_indent == -1) {
 		if (c == '\n') heredoc_line_indent = 0;
 	    }
 	    else {
 		if (c == ' ') {
 		    heredoc_line_indent++;
 		}
 		else if (c == '\t') {
 		    int w = (heredoc_line_indent / TAB_WIDTH) + 1;
 		    heredoc_line_indent = w * TAB_WIDTH;
 		}
 		else if (c != '\n') {
 		    if (heredoc_indent > heredoc_line_indent) {
 			heredoc_indent = heredoc_line_indent;
 		    }
 		    heredoc_line_indent = -1;
 		}
 	    }
 	}
 	if (paren && c == paren) {
 	    ++*nest;
 	}
@ -6465,6 +6508,12 @@ parser_heredoc_identifier(struct parser_params *parser)
 	c = nextc();
 	func = STR_FUNC_INDENT;
    }
    else if (c == '~') {
 	c = nextc();
 	func = STR_FUNC_INDENT;
 	heredoc_indent = INT_MAX;
 	heredoc_line_indent = 0;
    }
    switch (c) {
      case '\'':
 	func |= str_squote; goto quoted;
@ -6489,7 +6538,7 @@ parser_heredoc_identifier(struct parser_params *parser)
 	if (!parser_is_identchar()) {
 	    pushback(c);
 	    if (func & STR_FUNC_INDENT) {
-		pushback('-');
+		pushback(heredoc_indent > 0 ? '~' : '-');
 	    }
 	    return 0;
 	}
@ -6534,6 +6583,114 @@ parser_heredoc_restore(struct parser_params *parser, NODE *here)
    ripper_flush(parser);
 }
 static int
 dedent_pos(const char *str, long len, int width)
 {
    int i, col = 0;
    for (i = 0; i < len && col < width; i++) {
 	if (str[i] == ' ') {
 	    col++;
 	}
 	else if (str[i] == '\t') {
 	    int n = TAB_WIDTH * (col / TAB_WIDTH + 1);
 	    if (n > width) break;
 	    col = n;
 	}
 	else {
 	    break;
 	}
    }
    return i;
 }
 #ifndef RIPPER
 static VALUE
 parser_heredoc_dedent_string(VALUE input, int width, int first)
 {
    long len;
    int col;
    char *str, *p, *out_p, *end, *t;
    RSTRING_GETMEM(input, str, len);
    end = &str[len];
    p = str;
    if (!first) {
 	p = memchr(p, '\n', end - p);
 	if (!p) return input;
 	p++;
    }
    out_p = p;
    while (p < end) {
 	col = dedent_pos(p, end - p, width);
 	p += col;
 	if (!(t = memchr(p, '\n', end - p)))
 	    t = end;
 	else
 	    ++t;
 	if (p > out_p) memmove(out_p, p, t - p);
 	out_p += t - p;
 	p = t;
    }
    rb_str_set_len(input, out_p - str);
    return input;
 }
 static void
 parser_heredoc_dedent(struct parser_params *parser, NODE *root)
 {
    NODE *node, *str_node;
    int first = TRUE;
    int indent = heredoc_indent;
    if (indent <= 0) return;
    node = str_node = root;
    while (str_node) {
 	VALUE lit = str_node->nd_lit;
 	if (NIL_P(parser_heredoc_dedent_string(lit, indent, first)))
 	    compile_error(PARSER_ARG "dedent failure: %d: %"PRIsVALUE, indent, lit);
 	first = FALSE;
 	str_node = 0;
 	while ((node = node->nd_next) != 0 && nd_type(node) == NODE_ARRAY) {
 	    if ((str_node = node->nd_head) != 0) {
 		enum node_type type = nd_type(str_node);
 		if (type == NODE_STR || type == NODE_DSTR) break;
 	    }
 	}
    }
 }
 #else /* RIPPER */
 static void
 parser_heredoc_dedent(struct parser_params *parser, VALUE array)
 {
    if (heredoc_indent <= 0) return;
    dispatch2(heredoc_dedent, array, INT2NUM(heredoc_indent));
 }
 static VALUE
 parser_dedent_string(VALUE self, VALUE input, VALUE width)
 {
    char *str;
    long len;
    int wid, col;
    StringValue(input);
    wid = NUM2UINT(width);
    rb_str_modify(input);
    RSTRING_GETMEM(input, str, len);
    col = dedent_pos(str, len, wid);
    MEMMOVE(str, str + col, char, len - col);
    rb_str_set_len(input, len - col);
    return INT2NUM(col);
 }
 #endif
 static int
 parser_whole_match_p(struct parser_params *parser,
    const char *eos, long len, int indent)
@ -6685,7 +6842,15 @@ parser_here_document(struct parser_params *parser, NODE *here)
    }
    if (!(func & STR_FUNC_EXPAND)) {
 	int end = 0;
 	do {
 #ifdef RIPPER
 	    if (end && heredoc_indent > 0) {
 		set_yylval_str(str);
 		flush_string_content(enc);
 		return tSTRING_CONTENT;
 	    }
 #endif
 	    p = RSTRING_PTR(lex_lastline);
 	    pend = lex_pend;
 	    if (pend > p) {
@ -6712,7 +6877,7 @@ parser_here_document(struct parser_params *parser, NODE *here)
 		}
 		goto error;
 	    }
-	} while (!whole_match_p(eos, len, indent));
+	} while (!(end = whole_match_p(eos, len, indent)));
    }
    else {
 	/*	int mb = ENC_CODERANGE_7BIT, *mbp = &mb;*/
@ -6730,11 +6895,20 @@ parser_here_document(struct parser_params *parser, NODE *here)
 		goto restore;
 	    }
 	    if (c != '\n') {
 #ifdef RIPPER
 	      flush:
 #endif
 		set_yylval_str(STR_NEW3(tok(), toklen(), enc, func));
 		flush_string_content(enc);
 		return tSTRING_CONTENT;
 	    }
 	    tokadd(nextc());
 #ifdef RIPPER
 	    if (c == '\n' && heredoc_indent > 0) {
 		lex_goto_eol(parser);
 		goto flush;
 	    }
 #endif
 	    /*	    if (mbp && mb == ENC_CODERANGE_UNKNOWN) mbp = 0;*/
 	    if ((c = nextc()) == -1) goto error;
 	} while (!whole_match_p(eos, len, indent));
@ -11294,6 +11468,9 @@ InitVM_ripper(void)
    rb_define_method(rb_mKernel, "validate_object", ripper_validate_object, 1);
 #endif
    rb_define_singleton_method(Ripper, "dedent_string", parser_dedent_string, 2);
    rb_define_private_method(Ripper, "dedent_string", parser_dedent_string, 2);
    ripper_init_eventids1_table(Ripper);
    ripper_init_eventids2_table(Ripper);
--- a/test/ripper/test_parser_events.rb
+++ b/test/ripper/test_parser_events.rb
@ -431,6 +431,19 @@ class TestRipper::ParserEvents < Test::Unit::TestCase
    assert_equal("heredoc1\nheredoc2\n", heredoc, bug1921)
  end
  def test_heredoc_dedent
    thru_heredoc_dedent = false
    str = width = nil
    tree = parse("<""<~EOS\n heredoc\nEOS\n", :on_heredoc_dedent) {|e, s, w|
      thru_heredoc_dedent = true
      str = s
      width = w
    }
    assert_equal true, thru_heredoc_dedent
    assert_match(/string_content\(\), heredoc\n/, tree)
    assert_equal(1, width)
  end
  def test_massign
    thru_massign = false
    parse("a, b = 1, 2", :on_massign) {thru_massign = true}
--- a/test/ripper/test_sexp.rb
+++ b/test/ripper/test_sexp.rb
@ -38,6 +38,27 @@ class TestRipper::Sexp < Test::Unit::TestCase
    assert_equal "foo\n", search_sexp(:@tstring_content, sexp)[1]
  end
  def test_squiggly_heredoc
    sexp = Ripper.sexp("<<~eot\n      asdf\neot")
    assert_equal "asdf\n", search_sexp(:@tstring_content, sexp)[1]
  end
  def test_squiggly_heredoc_with_interpolated_expression
    sexp1 = Ripper.sexp(<<-eos)
 <<-eot
 a\#{1}z
 eot
    eos
    sexp2 = Ripper.sexp(<<-eos)
 <<~eot
  a\#{1}z
 eot
    eos
    assert_equal clear_pos(sexp1), clear_pos(sexp2)
  end
  def search_sexp(sym, sexp)
    return sexp if !sexp or sexp[0] == sym
    sexp.find do |e|
@ -46,4 +67,18 @@ class TestRipper::Sexp < Test::Unit::TestCase
      end
    end
  end
  def clear_pos(sexp)
    return sexp if !sexp
    sexp.each do |e|
      if Array === e
        if e.size == 3 and Array === (last = e.last) and
          last.size == 2 and Integer === last[0] and Integer === last[1]
          last.clear
        else
          clear_pos(e)
        end
      end
    end
  end
 end if ripper_test
--- a/test/ruby/test_syntax.rb
+++ b/test/ruby/test_syntax.rb
@ -475,6 +475,94 @@ e"
    assert_equal(expected, actual, "#{Bug7559}: ")
  end
  def test_dedented_heredoc_without_indentation
    assert_equal(" y\nz\n", <<~eos)
 y
 z
    eos
  end
  def test_dedented_heredoc_with_indentation
    assert_equal(" a\nb\n", <<~eos)
     a
    b
    eos
  end
  def test_dedented_heredoc_with_blank_less_indented_line
    # the blank line has two leading spaces
    result = eval("<<~eos\n" \
                  "    a\n" \
                  "  \n" \
                  "    b\n" \
                  "    eos\n")
    assert_equal("a\n\nb\n", result)
  end
  def test_dedented_heredoc_with_blank_less_indented_line_escaped
    result = eval("<<~eos\n" \
                  "    a\n" \
                  "\\ \\ \n" \
                  "    b\n" \
                  "    eos\n")
    assert_equal("    a\n  \n    b\n", result)
  end
  def test_dedented_heredoc_with_blank_more_indented_line
    # the blank line has six leading spaces
    result = eval("<<~eos\n" \
                  "    a\n" \
                  "      \n" \
                  "    b\n" \
                  "    eos\n")
    assert_equal("a\n  \nb\n", result)
  end
  def test_dedented_heredoc_with_blank_more_indented_line_escaped
    result = eval("<<~eos\n" \
                  "    a\n" \
                  "\\ \\ \\ \\ \\ \\ \n" \
                  "    b\n" \
                  "    eos\n")
    assert_equal("    a\n      \n    b\n", result)
  end
  def test_dedented_heredoc_with_empty_line
 result = eval("<<~eos\n" \
              "      This would contain specially formatted text.\n" \
              "\n" \
              "      That might span many lines\n" \
              "    eos\n")
    assert_equal(<<-eos, result)
 This would contain specially formatted text.
 That might span many lines
    eos
  end
  def test_dedented_heredoc_with_interpolated_expression
    result = eval(" <<~eos\n" \
                  "  #{1}a\n" \
                  " zy\n" \
                  "      eos\n")
      assert_equal(<<-eos, result)
 #{1}a
 zy
      eos
  end
  def test_dedented_heredoc_with_interpolated_string
    w = ""
    result = eval("<<~eos\n" \
                  " \#{w} a\n" \
                  "  zy\n" \
                  "    eos\n")
    assert_equal(<<-eos, result)
 #{w} a
 zy
    eos
  end
  def test_lineno_after_heredoc
    bug7559 = '[ruby-dev:46737]'
    expected, _, actual = __LINE__, <<eom, __LINE__