Lrama v0.5.7

2023-10-23 09:07:23 +09:00 · 2023-10-23 09:07:23 +09:00 · 2d468358a5
commit 2d468358a5
parent dd9b4851a3
14 changed files with 2224 additions and 812 deletions
--- a/tool/lrama/lib/lrama/command.rb
+++ b/tool/lrama/lib/lrama/command.rb
@ -6,8 +6,9 @@ module Lrama
      Report::Duration.enable if options.trace_opts[:time]
      warning = Lrama::Warning.new
-      grammar = Lrama::Parser.new(options.y.read).parse
+      text = options.y.read
      options.y.close if options.y != STDIN
      grammar = Lrama::Parser.new(text).parse
      states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
      states.compute
      context = Lrama::Context.new(states)
--- a/tool/lrama/lib/lrama/context.rb
+++ b/tool/lrama/lib/lrama/context.rb
@ -170,7 +170,7 @@ module Lrama
      return a
    end
-    # Mapping from rule number to lenght of RHS.
+    # Mapping from rule number to length of RHS.
    # Dummy rule is appended as the first element whose value is 0
    # because 0 means error in yydefact.
    def yyr2
@ -214,7 +214,7 @@ module Lrama
      (rule_id + 1) * -1
    end
-    # Symbol number is assinged to term first then nterm.
+    # Symbol number is assigned to term first then nterm.
    # This method calculates sequence_number for nterm.
    def nterm_number_to_sequence_number(nterm_number)
      nterm_number - @states.terms.count
@ -259,7 +259,7 @@ module Lrama
          actions[conflict.symbol.number] = ErrorActionNumber
        end
-        # If default_reduction_rule, replase default_reduction_rule in
+        # If default_reduction_rule, replace default_reduction_rule in
        # actions with zero.
        if state.default_reduction_rule
          actions.map! do |e|
@ -272,7 +272,7 @@ module Lrama
        end
        # If no default_reduction_rule, default behavior is an
-        # error then replase ErrorActionNumber with zero.
+        # error then replace ErrorActionNumber with zero.
        if !state.default_reduction_rule
          actions.map! do |e|
            if e == ErrorActionNumber
--- a/tool/lrama/lib/lrama/digraph.rb
+++ b/tool/lrama/lib/lrama/digraph.rb
@ -40,8 +40,7 @@ module Lrama
      end
      if @h[x] == d
-        while true do
+        while (z = @stack.pop) do
          z = @stack.pop
          @h[z] = Float::INFINITY
          break if z == x
          @result[z] = @result[x] # F (Top of S) = F x
--- a/tool/lrama/lib/lrama/grammar.rb
+++ b/tool/lrama/lib/lrama/grammar.rb
@ -1,3 +1,5 @@
 require "strscan"
 require "lrama/grammar/auxiliary"
 require "lrama/grammar/code"
 require "lrama/grammar/error_token"
@ -306,6 +308,188 @@ module Lrama
      @nterms ||= @symbols.select(&:nterm?)
    end
    def extract_references
      unless initial_action.nil?
        scanner = StringScanner.new(initial_action.s_value)
        references = []
        while !scanner.eos? do
          start = scanner.pos
          case
          # $ references
          # It need to wrap an identifier with brackets to use ".-" for identifiers
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, "$", tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
          # @ references
          # It need to wrap an identifier with brackets to use ".-" for identifiers
          when scanner.scan(/@\$/) # @$
            references << [:at, "$", nil, start, scanner.pos - 1]
          when scanner.scan(/@(\d+)/) # @1
            references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
          when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
            references << [:at, scanner[1], nil, start, scanner.pos - 1]
          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
            references << [:at, scanner[1], nil, start, scanner.pos - 1]
          else
            scanner.getch
          end
        end
        initial_action.token_code.references = references
        build_references(initial_action.token_code)
      end
      @printers.each do |printer|
        scanner = StringScanner.new(printer.code.s_value)
        references = []
        while !scanner.eos? do
          start = scanner.pos
          case
          # $ references
          # It need to wrap an identifier with brackets to use ".-" for identifiers
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, "$", tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
          # @ references
          # It need to wrap an identifier with brackets to use ".-" for identifiers
          when scanner.scan(/@\$/) # @$
            references << [:at, "$", nil, start, scanner.pos - 1]
          when scanner.scan(/@(\d+)/) # @1
            references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
          when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
            references << [:at, scanner[1], nil, start, scanner.pos - 1]
          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
            references << [:at, scanner[1], nil, start, scanner.pos - 1]
          else
            scanner.getch
          end
        end
        printer.code.token_code.references = references
        build_references(printer.code.token_code)
      end
      @error_tokens.each do |error_token|
        scanner = StringScanner.new(error_token.code.s_value)
        references = []
        while !scanner.eos? do
          start = scanner.pos
          case
          # $ references
          # It need to wrap an identifier with brackets to use ".-" for identifiers
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, "$", tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
          # @ references
          # It need to wrap an identifier with brackets to use ".-" for identifiers
          when scanner.scan(/@\$/) # @$
            references << [:at, "$", nil, start, scanner.pos - 1]
          when scanner.scan(/@(\d+)/) # @1
            references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
          when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
            references << [:at, scanner[1], nil, start, scanner.pos - 1]
          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
            references << [:at, scanner[1], nil, start, scanner.pos - 1]
          else
            scanner.getch
          end
        end
        error_token.code.token_code.references = references
        build_references(error_token.code.token_code)
      end
      @_rules.each do |lhs, rhs, _|
        rhs.each_with_index do |token, index|
          next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
          scanner = StringScanner.new(token.s_value)
          references = []
          while !scanner.eos? do
            start = scanner.pos
            case
            # $ references
            # It need to wrap an identifier with brackets to use ".-" for identifiers
            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
              references << [:dollar, "$", tag, start, scanner.pos - 1]
            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
              references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
              references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
              references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
            # @ references
            # It need to wrap an identifier with brackets to use ".-" for identifiers
            when scanner.scan(/@\$/) # @$
              references << [:at, "$", nil, start, scanner.pos - 1]
            when scanner.scan(/@(\d+)/) # @1
              references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
            when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
              references << [:at, scanner[1], nil, start, scanner.pos - 1]
            when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
              references << [:at, scanner[1], nil, start, scanner.pos - 1]
            when scanner.scan(/\/\*/)
              scanner.scan_until(/\*\//)
            else
              scanner.getch
            end
          end
          token.references = references
          token.numberize_references(lhs, rhs)
          build_references(token)
        end
      end
    end
    def create_token(type, s_value, line, column)
      t = Token.new(type: type, s_value: s_value)
      t.line = line
      t.column = column
      return t
    end
    private
    def find_nterm_by_id!(id)
@ -470,7 +654,9 @@ module Lrama
    # Fill #number and #token_id
    def fill_symbol_number
-      # TODO: why start from 256
+      # Character literal in grammar file has
      # token id corresponding to ASCII code by default,
      # so start token_id from 256.
      token_id = 256
      # YYEMPTY = -2
--- a/tool/lrama/lib/lrama/grammar/union.rb
+++ b/tool/lrama/lib/lrama/grammar/union.rb
@ -2,8 +2,8 @@ module Lrama
  class Grammar
    class Union < Struct.new(:code, :lineno, keyword_init: true)
      def braces_less_code
-        # Remove braces
+        # Braces is already removed by lexer
-        code.s_value[1..-2]
+        code.s_value
      end
    end
  end
--- a/tool/lrama/lib/lrama/lexer.rb
+++ b/tool/lrama/lib/lrama/lexer.rb
@ -1,346 +1,174 @@
 require "strscan"
 require "lrama/report/duration"
 require "lrama/lexer/token"
 module Lrama
  # Lexer for parse.y
  class Lexer
-    include Lrama::Report::Duration
+    attr_accessor :status
    attr_accessor :end_symbol
-    # States
+    SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
-    #
+    PERCENT_TOKENS = %w(
-    # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
+      %union
-    Initial = 0
+      %token
-    Prologue = 1
+      %type
-    BisonDeclarations = 2
+      %left
-    GrammarRules = 3
+      %right
-    Epilogue = 4
+      %nonassoc
-
+      %expect
-    # Token types
+      %define
-
+      %require
-    attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
+      %printer
-                :bison_declarations_tokens, :grammar_rules_tokens
+      %lex-param
      %parse-param
      %initial-action
      %precedence
      %prec
      %error-token
    )
    def initialize(text)
-      @text = text
+      @scanner = StringScanner.new(text)
-      @state = Initial
+      @head = @scanner.pos
-      # Array of texts
+      @line = 1
-      @prologue = []
+      @status = :initial
-      @bison_declarations = []
+      @end_symbol = nil
-      @grammar_rules = []
+    end
      @epilogue = []
-      @bison_declarations_tokens = []
+    def next_token
-      @grammar_rules_tokens = []
+      case @status
-
+      when :initial
-      @debug = false
+        lex_token
-
+      when :c_declaration
-      report_duration(:lex) do
+        lex_c_code
        lex_text
        lex_bison_declarations_tokens
        lex_grammar_rules_tokens
      end
    end
    def line
      @line
    end
    def column
      @scanner.pos - @head
    end
    def lex_token
      while !@scanner.eos? do
        case
        when @scanner.scan(/\n/)
          newline
        when @scanner.scan(/\s+/)
          # noop
        when @scanner.scan(/\/\*/)
          lex_comment
        when @scanner.scan(/\/\//)
          @scanner.scan_until(/\n/)
          newline
        when @scanner.scan(/%empty/)
          # noop
        else
          break
        end
      end
      @head_line = line
      @head_column = column
      case
      when @scanner.eos?
        return
      when @scanner.scan(/#{SYMBOLS.join('|')}/)
        return [@scanner.matched, @scanner.matched]
      when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
        return [@scanner.matched, @scanner.matched]
      when @scanner.scan(/<\w+>/)
        return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
      when @scanner.scan(/'.'/)
        return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
      when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
        return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
      when @scanner.scan(/"/)
        return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
      when @scanner.scan(/\d+/)
        return [:INTEGER, Integer(@scanner.matched)]
      when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
        token = build_token(type: Token::Ident, s_value: @scanner.matched)
        type =
          if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
            :IDENT_COLON
          else
            :IDENTIFIER
          end
        return [type, token]
      else
        raise
      end
    end
    def lex_c_code
      nested = 0
      code = ''
      while !@scanner.eos? do
        case
        when @scanner.scan(/{/)
          code += @scanner.matched
          nested += 1
        when @scanner.scan(/}/)
          if nested == 0 && @end_symbol == '}'
            @scanner.unscan
            return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
          else
            code += @scanner.matched
            nested -= 1
          end
        when @scanner.check(/#{@end_symbol}/)
          return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
        when @scanner.scan(/\n/)
          code += @scanner.matched
          newline
        when @scanner.scan(/"/)
          matched = @scanner.scan_until(/"/)
          code += %Q("#{matched})
          @line += matched.count("\n")
        when @scanner.scan(/'/)
          matched = @scanner.scan_until(/'/)
          code += %Q('#{matched})
        else
          code += @scanner.getch
        end
      end
      raise
    end
    private
-    def create_token(type, s_value, line, column)
+    def lex_comment
-      t = Token.new(type: type, s_value: s_value)
+      while !@scanner.eos? do
      t.line = line
      t.column = column
      return t
    end
    # TODO: Remove this
    def lex_text
      @text.each_line.with_index(1) do |string, lineno|
        case @state
        when Initial
          # Skip until "%{"
          if string == "%{\n"
            @state = Prologue
            @prologue << ["", lineno]
            next
          end
        when Prologue
          # Between "%{" and "%}"
          if string == "%}\n"
            @state = BisonDeclarations
            @prologue << ["", lineno]
            next
          end
          @prologue << [string, lineno]
        when BisonDeclarations
          if string == "%%\n"
            @state = GrammarRules
            next
          end
          @bison_declarations << [string, lineno]
        when GrammarRules
          # Between "%%" and "%%"
          if string == "%%\n"
            @state = Epilogue
            next
          end
          @grammar_rules << [string, lineno]
        when Epilogue
          @epilogue << [string, lineno]
        else
          raise "Unknown state: #{@state}"
        end
      end
    end
    # See:
    #   * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
    #   * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
    #   * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
    def lex_common(lines, tokens)
      line = lines.first[1]
      column = 0
      ss = StringScanner.new(lines.map(&:first).join)
      while !ss.eos? do
        case
-        when ss.scan(/\n/)
+        when @scanner.scan(/\n/)
-          line += 1
+          @line += 1
-          column = ss.pos
+          @head = @scanner.pos + 1
-        when ss.scan(/\s+/)
+        when @scanner.scan(/\*\//)
-          # skip
+          return
        when ss.scan(/;/)
          tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
        when ss.scan(/\|/)
          tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
        when ss.scan(/(\d+)/)
          tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
        when ss.scan(/(<[a-zA-Z0-9_]+>)/)
          tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
          tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
          tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
          tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
          tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
        when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
          tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
        when ss.scan(/%expect/)
          tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
        when ss.scan(/%define/)
          tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
        when ss.scan(/%printer/)
          tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
        when ss.scan(/%error-token/)
          tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
        when ss.scan(/%lex-param/)
          tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
        when ss.scan(/%parse-param/)
          tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
        when ss.scan(/%initial-action/)
          tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
        when ss.scan(/%union/)
          tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
        when ss.scan(/%token/)
          tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
        when ss.scan(/%type/)
          tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
        when ss.scan(/%nonassoc/)
          tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
        when ss.scan(/%left/)
          tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
        when ss.scan(/%right/)
          tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
        when ss.scan(/%precedence/)
          tokens << create_token(Token::P_precedence, ss[0], line, ss.pos - column)
        when ss.scan(/%prec/)
          tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
        when ss.scan(/{/)
          token, line = lex_user_code(ss, line, ss.pos - column, lines)
          tokens << token
        when ss.scan(/"/)
          string, line = lex_string(ss, "\"", line, lines)
          token = create_token(Token::String, string, line, ss.pos - column)
          tokens << token
        when ss.scan(/\/\*/)
          # TODO: Need to keep comment?
          line = lex_comment(ss, line, lines, "")
        when ss.scan(/\/\//)
          line = lex_line_comment(ss, line, "")
        when ss.scan(/'(.)'/)
          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
        when ss.scan(/'\\(.)'/) # '\\', '\t'
          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
        when ss.scan(/'\\(\d+)'/) # '\13'
          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
        when ss.scan(/%empty/)
          # skip
        else
-          l = line - lines.first[1]
+          @scanner.getch
          split = ss.string.split("\n")
          col = ss.pos - split[0...l].join("\n").length
          raise "Parse error (unknown token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
        end
      end
    end
-    def lex_bison_declarations_tokens
+    def build_token(type:, s_value:, **options)
-      lex_common(@bison_declarations, @bison_declarations_tokens)
+      token = Token.new(type: type, s_value: s_value)
-    end
+      token.line = @head_line
-
+      token.column = @head_column
-    def lex_user_code(ss, line, column, lines)
+      options.each do |attr, value|
-      first_line = line
+        token.public_send("#{attr}=", value)
      first_column = column
      debug("Enter lex_user_code: #{line}")
      brace_count = 1
      str = "{"
      # Array of [type, $n, tag, first column, last column]
      # TODO: Is it better to keep string, like "$$", and use gsub?
      references = []
      while !ss.eos? do
        case
        when ss.scan(/\n/)
          line += 1
        when ss.scan(/"/)
          string, line = lex_string(ss, "\"", line, lines)
          str << string
          next
        when ss.scan(/'/)
          string, line = lex_string(ss, "'", line, lines)
          str << string
          next
        # $ references
        # It need to wrap an identifier with brackets to use ".-" for identifiers
        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
          references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
          references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
          references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
          references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
        # @ references
        # It need to wrap an identifier with brackets to use ".-" for identifiers
        when ss.scan(/@\$/) # @$
          references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
        when ss.scan(/@(\d+)/) # @1
          references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
        when ss.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
          references << [:at, ss[1], nil, str.length, str.length + ss[0].length - 1]
        when ss.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
          references << [:at, ss[1], nil, str.length, str.length + ss[0].length - 1]
        when ss.scan(/{/)
          brace_count += 1
        when ss.scan(/}/)
          brace_count -= 1
          debug("Return lex_user_code: #{line}")
          if brace_count == 0
            str << ss[0]
            user_code = Token.new(type: Token::User_code, s_value: str.freeze)
            user_code.line = first_line
            user_code.column = first_column
            user_code.references = references
            return [user_code, line]
          end
        when ss.scan(/\/\*/)
          str << ss[0]
          line = lex_comment(ss, line, lines, str)
        when ss.scan(/\/\//)
          str << ss[0]
          line = lex_line_comment(ss, line, str)
        else
          # noop, just consume char
          str << ss.getch
          next
        end
        str << ss[0]
      end
-      # Reach to end of input but brace does not match
+      token
      l = line - lines.first[1]
      raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
    end
-    def lex_string(ss, terminator, line, lines)
+    def newline
-      debug("Enter lex_string: #{line}")
+      @line += 1
-
+      @head = @scanner.pos + 1
      str = terminator.dup
      while (c = ss.getch) do
        str << c
        case c
        when "\n"
          line += 1
        when terminator
          debug("Return lex_string: #{line}")
          return [str, line]
        else
          # noop
        end
      end
      # Reach to end of input but quote does not match
      l = line - lines.first[1]
      raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
    end
    # /*  */ style comment
    def lex_comment(ss, line, lines, str)
      while !ss.eos? do
        case
        when ss.scan(/\n/)
          line += 1
        when ss.scan(/\*\//)
          return line
        else
          str << ss.getch
          next
        end
        str << ss[0]
      end
      # Reach to end of input but quote does not match
      l = line - lines.first[1]
      raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
    end
    # // style comment
    def lex_line_comment(ss, line, str)
      while !ss.eos? do
        case
        when ss.scan(/\n/)
          return line + 1
        else
          str << ss.getch
          next
        end
      end
      line # Reach to end of input
    end
    def lex_grammar_rules_tokens
      lex_common(@grammar_rules, @grammar_rules_tokens)
    end
    def debug(msg)
      return unless @debug
      puts "#{msg}\n"
    end
  end
 end
--- a/tool/lrama/lib/lrama/option_parser.rb
+++ b/tool/lrama/lib/lrama/option_parser.rb
@ -52,12 +52,16 @@ module Lrama
          Usage: lrama [options] FILE
        BANNER
        o.separator ''
        o.separator 'STDIN mode:'
        o.separator 'lrama [options] - FILE               read grammar from STDIN'
        o.separator ''
        o.separator 'Tuning the Parser:'
        o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
        o.on('-t', 'reserved, do nothing') { }
        o.separator ''
        o.separator 'Output:'
-        o.on('-h', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
+        o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
        o.on('-h=[FILE]', 'also produce a header file named FILE (deprecated)') {|v| @options.header = true; @options.header_file = v }
        o.on('-d', 'also produce a header file') { @options.header = true }
        o.on('-r', '--report=THINGS', Array, 'also produce details on the automaton') {|v| @report = v }
        o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v }
--- a/tool/lrama/lib/lrama/output.rb
+++ b/tool/lrama/lib/lrama/output.rb
@ -7,7 +7,7 @@ module Lrama
    extend Forwardable
    include Report::Duration
-    attr_reader :grammar_file_path, :context, :grammar, :error_recovery
+    attr_reader :grammar_file_path, :context, :grammar, :error_recovery, :include_header
    def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
                               :yymaxutok, :yypact_ninf, :yytable_ninf
@ -28,6 +28,7 @@ module Lrama
      @context = context
      @grammar = grammar
      @error_recovery = error_recovery
      @include_header = header_file_path ? header_file_path.sub("./", "") : nil
    end
    if ERB.instance_method(:initialize).parameters.last.first == :key
@ -40,11 +41,8 @@ module Lrama
      end
    end
-    def eval_template(file, path)
+    def render_partial(file)
-      erb = self.class.erb(File.read(file))
+      render_template(partial_file(file))
      erb.filename = file
      tmp = erb.result_with_hash(context: @context, output: self)
      replace_special_variables(tmp, path)
    end
    def render
@ -143,7 +141,7 @@ module Lrama
        str << <<-STR
    case #{sym.enum_name}: /* #{sym.comment}  */
 #line #{sym.printer.lineno} "#{@grammar_file_path}"
-         #{sym.printer.translated_code(sym.tag)}
+         {#{sym.printer.translated_code(sym.tag)}}
 #line [@oline@] [@ofile@]
        break;
@ -160,7 +158,7 @@ module Lrama
      <<-STR
        #{comment}
 #line #{@grammar.initial_action.line} "#{@grammar_file_path}"
-        #{@grammar.initial_action.translated_code}
+        {#{@grammar.initial_action.translated_code}}
      STR
    end
@ -173,7 +171,7 @@ module Lrama
        str << <<-STR
    case #{sym.enum_name}: /* #{sym.comment}  */
 #line #{sym.error_token.lineno} "#{@grammar_file_path}"
-         #{sym.error_token.translated_code(sym.tag)}
+         {#{sym.error_token.translated_code(sym.tag)}}
 #line [@oline@] [@ofile@]
        break;
@ -190,14 +188,13 @@ module Lrama
      @context.states.rules.each do |rule|
        next unless rule.code
        rule = rule
        code = rule.code
        spaces = " " * (code.column - 1)
        str << <<-STR
  case #{rule.id + 1}: /* #{rule.as_comment}  */
 #line #{code.line} "#{@grammar_file_path}"
-#{spaces}#{rule.translated_code}
+#{spaces}{#{rule.translated_code}}
 #line [@oline@] [@ofile@]
    break;
@ -212,14 +209,14 @@ module Lrama
      str
    end
-    def omit_braces_and_blanks(param)
+    def omit_blanks(param)
-      param[1..-2].strip
+      param.strip
    end
    # b4_parse_param
    def parse_param
      if @grammar.parse_param
-        omit_braces_and_blanks(@grammar.parse_param)
+        omit_blanks(@grammar.parse_param)
      else
        ""
      end
@ -227,7 +224,7 @@ module Lrama
    def lex_param
      if @grammar.lex_param
-        omit_braces_and_blanks(@grammar.lex_param)
+        omit_blanks(@grammar.lex_param)
      else
        ""
      end
@ -354,6 +351,17 @@ module Lrama
    private
    def eval_template(file, path)
      tmp = render_template(file)
      replace_special_variables(tmp, path)
    end
    def render_template(file)
      erb = self.class.erb(File.read(file))
      erb.filename = file
      erb.result_with_hash(context: @context, output: self)
    end
    def template_file
      File.join(template_dir, @template_name)
    end
@ -362,6 +370,10 @@ module Lrama
      File.join(template_dir, "bison/yacc.h")
    end
    def partial_file(file)
      File.join(template_dir, file)
    end
    def template_dir
      File.expand_path("../../../template", __FILE__)
    end
--- a/tool/lrama/lib/lrama/parser.rb
+++ b/tool/lrama/lib/lrama/parser.rb
--- a/tool/lrama/lib/lrama/parser/token_scanner.rb
+++ b/tool/lrama/lib/lrama/parser/token_scanner.rb
@ -1,56 +0,0 @@
 module Lrama
  class Parser
    class TokenScanner
      def initialize(tokens)
        @tokens = tokens
        @index = 0
      end
      def current_token
        @tokens[@index]
      end
      def current_type
        current_token&.type
      end
      def previous_token
        @tokens[@index - 1]
      end
      def next
        token = current_token
        @index += 1
        return token
      end
      def consume(*token_types)
        if token_types.include?(current_type)
          return self.next
        end
        return nil
      end
      def consume!(*token_types)
        consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
      end
      def consume_multi(*token_types)
        a = []
        while token_types.include?(current_type)
          a << self.next
        end
        raise "No token is consumed. #{token_types}" if a.empty?
        return a
      end
      def eots?
        current_token.nil?
      end
    end
  end
 end
--- a/tool/lrama/lib/lrama/version.rb
+++ b/tool/lrama/lib/lrama/version.rb
@ -1,3 +1,3 @@
 module Lrama
-  VERSION = "0.5.6".freeze
+  VERSION = "0.5.7".freeze
 end
--- a/tool/lrama/template/bison/_yacc.h
+++ b/tool/lrama/template/bison/_yacc.h
@ -0,0 +1,71 @@
 <%# b4_shared_declarations -%>
  <%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
    <%- if output.spec_mapped_header_file -%>
 #ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
 # define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
    <%- end -%>
  <%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
 /* Debug traces.  */
 #ifndef YYDEBUG
 # define YYDEBUG 0
 #endif
 #if YYDEBUG && !defined(yydebug)
 extern int yydebug;
 #endif
  <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
  <%-# b4_token_enums_defines -%>
 /* Token kinds.  */
 #ifndef YYTOKENTYPE
 # define YYTOKENTYPE
  enum yytokentype
  {
 <%= output.token_enums -%>
  };
  typedef enum yytokentype yytoken_kind_t;
 #endif
  <%-# b4_declare_yylstype -%>
    <%-# b4_value_type_define -%>
 /* Value type.  */
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 union YYSTYPE
 {
 #line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
 <%= output.grammar.union.braces_less_code %>
 #line [@oline@] [@ofile@]
 };
 typedef union YYSTYPE YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
 # define YYSTYPE_IS_DECLARED 1
 #endif
    <%-# b4_location_type_define -%>
 /* Location type.  */
 #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
 typedef struct YYLTYPE YYLTYPE;
 struct YYLTYPE
 {
  int first_line;
  int first_column;
  int last_line;
  int last_column;
 };
 # define YYLTYPE_IS_DECLARED 1
 # define YYLTYPE_IS_TRIVIAL 1
 #endif
  <%-# b4_declare_yyerror_and_yylex. Not supported -%>
  <%-# b4_declare_yyparse -%>
 int yyparse (<%= output.parse_param %>);
  <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
    <%- if output.spec_mapped_header_file -%>
 #endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %>  */
    <%- end -%>
--- a/tool/lrama/template/bison/yacc.c
+++ b/tool/lrama/template/bison/yacc.c
@ -101,79 +101,13 @@
 # endif
 <%# b4_header_include_if -%>
 <%- if output.include_header -%>
 #include "<%= output.include_header %>"
 <%- else -%>
 /* Use api.header.include to #include this header
   instead of duplicating it here.  */
-<%# b4_shared_declarations -%>
+<%= output.render_partial("bison/_yacc.h") %>
-  <%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
+<%- end -%>
    <%- if output.spec_mapped_header_file -%>
 #ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
 # define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
    <%- end -%>
  <%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
 /* Debug traces.  */
 #ifndef YYDEBUG
 # define YYDEBUG 0
 #endif
 #if YYDEBUG && !defined(yydebug)
 extern int yydebug;
 #endif
  <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
  <%-# b4_token_enums_defines -%>
 /* Token kinds.  */
 #ifndef YYTOKENTYPE
 # define YYTOKENTYPE
  enum yytokentype
  {
 <%= output.token_enums -%>
  };
  typedef enum yytokentype yytoken_kind_t;
 #endif
  <%-# b4_declare_yylstype -%>
    <%-# b4_value_type_define -%>
 /* Value type.  */
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 union YYSTYPE
 {
 #line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
 <%= output.grammar.union.braces_less_code %>
 #line [@oline@] [@ofile@]
 };
 typedef union YYSTYPE YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
 # define YYSTYPE_IS_DECLARED 1
 #endif
    <%-# b4_location_type_define -%>
 /* Location type.  */
 #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
 typedef struct YYLTYPE YYLTYPE;
 struct YYLTYPE
 {
  int first_line;
  int first_column;
  int last_line;
  int last_column;
 };
 # define YYLTYPE_IS_DECLARED 1
 # define YYLTYPE_IS_TRIVIAL 1
 #endif
  <%-# b4_declare_yyerror_and_yylex. Not supported -%>
  <%-# b4_declare_yyparse -%>
 int yyparse (<%= output.parse_param %>);
  <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
    <%- if output.spec_mapped_header_file -%>
 #endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %>  */
    <%- end -%>
 <%# b4_declare_symbol_enum -%>
 /* Symbol kind.  */
 enum yysymbol_kind_t
@ -2114,3 +2048,4 @@ yyreturnlab:
 #line <%= output.aux.epilogue_first_lineno - 1 %> "<%= output.grammar_file_path %>"
 <%= output.aux.epilogue -%>
--- a/tool/lrama/template/bison/yacc.h
+++ b/tool/lrama/template/bison/yacc.h
@ -37,76 +37,4 @@
 /* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
   especially those whose name start with YY_ or yy_.  They are
   private implementation details that can be changed or removed.  */
-
+<%= output.render_partial("bison/_yacc.h") %>
 <%# b4_shared_declarations -%>
 <%# b4_shared_declarations -%>
  <%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
    <%- if output.spec_mapped_header_file -%>
 #ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
 # define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
    <%- end -%>
  <%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
 /* Debug traces.  */
 #ifndef YYDEBUG
 # define YYDEBUG 0
 #endif
 #if YYDEBUG
 extern int yydebug;
 #endif
  <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
  <%-# b4_token_enums_defines -%>
 /* Token kinds.  */
 #ifndef YYTOKENTYPE
 # define YYTOKENTYPE
  enum yytokentype
  {
 <%= output.token_enums -%>
  };
  typedef enum yytokentype yytoken_kind_t;
 #endif
  <%-# b4_declare_yylstype -%>
    <%-# b4_value_type_define -%>
 /* Value type.  */
 #if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
 union YYSTYPE
 {
 #line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
 <%= output.grammar.union.braces_less_code %>
 #line [@oline@] [@ofile@]
 };
 typedef union YYSTYPE YYSTYPE;
 # define YYSTYPE_IS_TRIVIAL 1
 # define YYSTYPE_IS_DECLARED 1
 #endif
    <%-# b4_location_type_define -%>
 /* Location type.  */
 #if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
 typedef struct YYLTYPE YYLTYPE;
 struct YYLTYPE
 {
  int first_line;
  int first_column;
  int last_line;
  int last_column;
 };
 # define YYLTYPE_IS_DECLARED 1
 # define YYLTYPE_IS_TRIVIAL 1
 #endif
  <%-# b4_declare_yyerror_and_yylex. Not supported -%>
  <%-# b4_declare_yyparse -%>
 int yyparse (<%= output.parse_param %>);
  <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
    <%- if output.spec_mapped_header_file -%>
 #endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %>  */
    <%- end -%>