Lrama v0.5.7

2023-10-23 09:07:23 +09:00 · 2023-10-23 09:07:23 +09:00 · 2d468358a5
commit 2d468358a5
parent dd9b4851a3
14 changed files with 2224 additions and 812 deletions
--- a/tool/lrama/lib/lrama/command.rb
+++ b/tool/lrama/lib/lrama/command.rb
@ -6,8 +6,9 @@ module Lrama
      Report::Duration.enable if options.trace_opts[:time]

      warning = Lrama::Warning.new
-      grammar = Lrama::Parser.new(options.y.read).parse
+      text = options.y.read
      options.y.close if options.y != STDIN
+      grammar = Lrama::Parser.new(text).parse
      states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
      states.compute
      context = Lrama::Context.new(states)
--- a/tool/lrama/lib/lrama/context.rb
+++ b/tool/lrama/lib/lrama/context.rb
@ -170,7 +170,7 @@ module Lrama
      return a
    end

-    # Mapping from rule number to lenght of RHS.
+    # Mapping from rule number to length of RHS.
    # Dummy rule is appended as the first element whose value is 0
    # because 0 means error in yydefact.
    def yyr2
@ -214,7 +214,7 @@ module Lrama
      (rule_id + 1) * -1
    end

-    # Symbol number is assinged to term first then nterm.
+    # Symbol number is assigned to term first then nterm.
    # This method calculates sequence_number for nterm.
    def nterm_number_to_sequence_number(nterm_number)
      nterm_number - @states.terms.count
@ -259,7 +259,7 @@ module Lrama
          actions[conflict.symbol.number] = ErrorActionNumber
        end

-        # If default_reduction_rule, replase default_reduction_rule in
+        # If default_reduction_rule, replace default_reduction_rule in
        # actions with zero.
        if state.default_reduction_rule
          actions.map! do |e|
@ -272,7 +272,7 @@ module Lrama
        end

        # If no default_reduction_rule, default behavior is an
-        # error then replase ErrorActionNumber with zero.
+        # error then replace ErrorActionNumber with zero.
        if !state.default_reduction_rule
          actions.map! do |e|
            if e == ErrorActionNumber
--- a/tool/lrama/lib/lrama/digraph.rb
+++ b/tool/lrama/lib/lrama/digraph.rb
@ -40,8 +40,7 @@ module Lrama
      end

      if @h[x] == d
-        while true do
-          z = @stack.pop
+        while (z = @stack.pop) do
          @h[z] = Float::INFINITY
          break if z == x
          @result[z] = @result[x] # F (Top of S) = F x
--- a/tool/lrama/lib/lrama/grammar.rb
+++ b/tool/lrama/lib/lrama/grammar.rb
@ -1,3 +1,5 @@
+require "strscan"
+
 require "lrama/grammar/auxiliary"
 require "lrama/grammar/code"
 require "lrama/grammar/error_token"
@ -306,6 +308,188 @@ module Lrama
      @nterms ||= @symbols.select(&:nterm?)
    end

+    def extract_references
+      unless initial_action.nil?
+        scanner = StringScanner.new(initial_action.s_value)
+        references = []
+
+        while !scanner.eos? do
+          start = scanner.pos
+          case
+          # $ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, "$", tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+          # @ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/@\$/) # @$
+            references << [:at, "$", nil, start, scanner.pos - 1]
+          when scanner.scan(/@(\d+)/) # @1
+            references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+          when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+            references << [:at, scanner[1], nil, start, scanner.pos - 1]
+          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
+            references << [:at, scanner[1], nil, start, scanner.pos - 1]
+          else
+            scanner.getch
+          end
+        end
+
+        initial_action.token_code.references = references
+        build_references(initial_action.token_code)
+      end
+
+      @printers.each do |printer|
+        scanner = StringScanner.new(printer.code.s_value)
+        references = []
+
+        while !scanner.eos? do
+          start = scanner.pos
+          case
+          # $ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, "$", tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+          # @ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/@\$/) # @$
+            references << [:at, "$", nil, start, scanner.pos - 1]
+          when scanner.scan(/@(\d+)/) # @1
+            references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+          when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+            references << [:at, scanner[1], nil, start, scanner.pos - 1]
+          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
+            references << [:at, scanner[1], nil, start, scanner.pos - 1]
+          else
+            scanner.getch
+          end
+        end
+
+        printer.code.token_code.references = references
+        build_references(printer.code.token_code)
+      end
+
+      @error_tokens.each do |error_token|
+        scanner = StringScanner.new(error_token.code.s_value)
+        references = []
+
+        while !scanner.eos? do
+          start = scanner.pos
+          case
+          # $ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, "$", tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+          when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+            tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+            references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+          # @ references
+          # It need to wrap an identifier with brackets to use ".-" for identifiers
+          when scanner.scan(/@\$/) # @$
+            references << [:at, "$", nil, start, scanner.pos - 1]
+          when scanner.scan(/@(\d+)/) # @1
+            references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+          when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+            references << [:at, scanner[1], nil, start, scanner.pos - 1]
+          when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
+            references << [:at, scanner[1], nil, start, scanner.pos - 1]
+          else
+            scanner.getch
+          end
+        end
+
+        error_token.code.token_code.references = references
+        build_references(error_token.code.token_code)
+      end
+
+      @_rules.each do |lhs, rhs, _|
+        rhs.each_with_index do |token, index|
+          next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
+
+          scanner = StringScanner.new(token.s_value)
+          references = []
+
+          while !scanner.eos? do
+            start = scanner.pos
+            case
+            # $ references
+            # It need to wrap an identifier with brackets to use ".-" for identifiers
+            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
+              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+              references << [:dollar, "$", tag, start, scanner.pos - 1]
+            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
+              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+              references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
+            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
+              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+              references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+            when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
+              tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
+              references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
+
+            # @ references
+            # It need to wrap an identifier with brackets to use ".-" for identifiers
+            when scanner.scan(/@\$/) # @$
+              references << [:at, "$", nil, start, scanner.pos - 1]
+            when scanner.scan(/@(\d+)/) # @1
+              references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
+            when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
+              references << [:at, scanner[1], nil, start, scanner.pos - 1]
+            when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
+              references << [:at, scanner[1], nil, start, scanner.pos - 1]
+
+            when scanner.scan(/\/\*/)
+              scanner.scan_until(/\*\//)
+            else
+              scanner.getch
+            end
+          end
+
+          token.references = references
+          token.numberize_references(lhs, rhs)
+          build_references(token)
+        end
+      end
+    end
+
+    def create_token(type, s_value, line, column)
+      t = Token.new(type: type, s_value: s_value)
+      t.line = line
+      t.column = column
+
+      return t
+    end
+
    private

    def find_nterm_by_id!(id)
@ -470,7 +654,9 @@ module Lrama

    # Fill #number and #token_id
    def fill_symbol_number
-      # TODO: why start from 256
+      # Character literal in grammar file has
+      # token id corresponding to ASCII code by default,
+      # so start token_id from 256.
      token_id = 256

      # YYEMPTY = -2
--- a/tool/lrama/lib/lrama/grammar/union.rb
+++ b/tool/lrama/lib/lrama/grammar/union.rb
@ -2,8 +2,8 @@ module Lrama
  class Grammar
    class Union < Struct.new(:code, :lineno, keyword_init: true)
      def braces_less_code
-        # Remove braces
-        code.s_value[1..-2]
+        # Braces is already removed by lexer
+        code.s_value
      end
    end
  end
--- a/tool/lrama/lib/lrama/lexer.rb
+++ b/tool/lrama/lib/lrama/lexer.rb
@ -1,346 +1,174 @@
 require "strscan"
-require "lrama/report/duration"
 require "lrama/lexer/token"

 module Lrama
-  # Lexer for parse.y
  class Lexer
-    include Lrama::Report::Duration
+    attr_accessor :status
+    attr_accessor :end_symbol

-    # States
-    #
-    # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
-    Initial = 0
-    Prologue = 1
-    BisonDeclarations = 2
-    GrammarRules = 3
-    Epilogue = 4
-
-    # Token types
-
-    attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
-                :bison_declarations_tokens, :grammar_rules_tokens
+    SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
+    PERCENT_TOKENS = %w(
+      %union
+      %token
+      %type
+      %left
+      %right
+      %nonassoc
+      %expect
+      %define
+      %require
+      %printer
+      %lex-param
+      %parse-param
+      %initial-action
+      %precedence
+      %prec
+      %error-token
+    )

    def initialize(text)
-      @text = text
-      @state = Initial
-      # Array of texts
-      @prologue = []
-      @bison_declarations = []
-      @grammar_rules = []
-      @epilogue = []
+      @scanner = StringScanner.new(text)
+      @head = @scanner.pos
+      @line = 1
+      @status = :initial
+      @end_symbol = nil
+    end

-      @bison_declarations_tokens = []
-      @grammar_rules_tokens = []
-
-      @debug = false
-
-      report_duration(:lex) do
-        lex_text
-        lex_bison_declarations_tokens
-        lex_grammar_rules_tokens
+    def next_token
+      case @status
+      when :initial
+        lex_token
+      when :c_declaration
+        lex_c_code
      end
    end

+    def line
+      @line
+    end
+
+    def column
+      @scanner.pos - @head
+    end
+
+    def lex_token
+      while !@scanner.eos? do
+        case
+        when @scanner.scan(/\n/)
+          newline
+        when @scanner.scan(/\s+/)
+          # noop
+        when @scanner.scan(/\/\*/)
+          lex_comment
+        when @scanner.scan(/\/\//)
+          @scanner.scan_until(/\n/)
+          newline
+        when @scanner.scan(/%empty/)
+          # noop
+        else
+          break
+        end
+      end
+
+      @head_line = line
+      @head_column = column
+
+      case
+      when @scanner.eos?
+        return
+      when @scanner.scan(/#{SYMBOLS.join('|')}/)
+        return [@scanner.matched, @scanner.matched]
+      when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
+        return [@scanner.matched, @scanner.matched]
+      when @scanner.scan(/<\w+>/)
+        return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
+      when @scanner.scan(/'.'/)
+        return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
+      when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
+        return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
+      when @scanner.scan(/"/)
+        return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
+      when @scanner.scan(/\d+/)
+        return [:INTEGER, Integer(@scanner.matched)]
+      when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
+        token = build_token(type: Token::Ident, s_value: @scanner.matched)
+        type =
+          if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
+            :IDENT_COLON
+          else
+            :IDENTIFIER
+          end
+        return [type, token]
+      else
+        raise
+      end
+    end
+
+    def lex_c_code
+      nested = 0
+      code = ''
+      while !@scanner.eos? do
+        case
+        when @scanner.scan(/{/)
+          code += @scanner.matched
+          nested += 1
+        when @scanner.scan(/}/)
+          if nested == 0 && @end_symbol == '}'
+            @scanner.unscan
+            return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
+          else
+            code += @scanner.matched
+            nested -= 1
+          end
+        when @scanner.check(/#{@end_symbol}/)
+          return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
+        when @scanner.scan(/\n/)
+          code += @scanner.matched
+          newline
+        when @scanner.scan(/"/)
+          matched = @scanner.scan_until(/"/)
+          code += %Q("#{matched})
+          @line += matched.count("\n")
+        when @scanner.scan(/'/)
+          matched = @scanner.scan_until(/'/)
+          code += %Q('#{matched})
+        else
+          code += @scanner.getch
+        end
+      end
+      raise
+    end
+
    private

-    def create_token(type, s_value, line, column)
-      t = Token.new(type: type, s_value: s_value)
-      t.line = line
-      t.column = column
-
-      return t
-    end
-
-    # TODO: Remove this
-    def lex_text
-      @text.each_line.with_index(1) do |string, lineno|
-        case @state
-        when Initial
-          # Skip until "%{"
-          if string == "%{\n"
-            @state = Prologue
-            @prologue << ["", lineno]
-            next
-          end
-        when Prologue
-          # Between "%{" and "%}"
-          if string == "%}\n"
-            @state = BisonDeclarations
-            @prologue << ["", lineno]
-            next
-          end
-
-          @prologue << [string, lineno]
-        when BisonDeclarations
-          if string == "%%\n"
-            @state = GrammarRules
-            next
-          end
-
-          @bison_declarations << [string, lineno]
-        when GrammarRules
-          # Between "%%" and "%%"
-          if string == "%%\n"
-            @state = Epilogue
-            next
-          end
-
-          @grammar_rules << [string, lineno]
-        when Epilogue
-          @epilogue << [string, lineno]
-        else
-          raise "Unknown state: #{@state}"
-        end
-      end
-    end
-
-    # See:
-    #   * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
-    #   * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
-    #   * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
-    def lex_common(lines, tokens)
-      line = lines.first[1]
-      column = 0
-      ss = StringScanner.new(lines.map(&:first).join)
-
-      while !ss.eos? do
+    def lex_comment
+      while !@scanner.eos? do
        case
-        when ss.scan(/\n/)
-          line += 1
-          column = ss.pos
-        when ss.scan(/\s+/)
-          # skip
-        when ss.scan(/;/)
-          tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
-        when ss.scan(/\|/)
-          tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
-        when ss.scan(/(\d+)/)
-          tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
-        when ss.scan(/(<[a-zA-Z0-9_]+>)/)
-          tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
-        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
-          tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
-          tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
-        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
-          tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
-        when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
-          tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
-        when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
-          tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
-        when ss.scan(/%expect/)
-          tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
-        when ss.scan(/%define/)
-          tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
-        when ss.scan(/%printer/)
-          tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
-        when ss.scan(/%error-token/)
-          tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
-        when ss.scan(/%lex-param/)
-          tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
-        when ss.scan(/%parse-param/)
-          tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
-        when ss.scan(/%initial-action/)
-          tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
-        when ss.scan(/%union/)
-          tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
-        when ss.scan(/%token/)
-          tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
-        when ss.scan(/%type/)
-          tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
-        when ss.scan(/%nonassoc/)
-          tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
-        when ss.scan(/%left/)
-          tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
-        when ss.scan(/%right/)
-          tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
-        when ss.scan(/%precedence/)
-          tokens << create_token(Token::P_precedence, ss[0], line, ss.pos - column)
-        when ss.scan(/%prec/)
-          tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
-        when ss.scan(/{/)
-          token, line = lex_user_code(ss, line, ss.pos - column, lines)
-          tokens << token
-        when ss.scan(/"/)
-          string, line = lex_string(ss, "\"", line, lines)
-          token = create_token(Token::String, string, line, ss.pos - column)
-          tokens << token
-        when ss.scan(/\/\*/)
-          # TODO: Need to keep comment?
-          line = lex_comment(ss, line, lines, "")
-        when ss.scan(/\/\//)
-          line = lex_line_comment(ss, line, "")
-        when ss.scan(/'(.)'/)
-          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
-        when ss.scan(/'\\(.)'/) # '\\', '\t'
-          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
-        when ss.scan(/'\\(\d+)'/) # '\13'
-          tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
-        when ss.scan(/%empty/)
-          # skip
+        when @scanner.scan(/\n/)
+          @line += 1
+          @head = @scanner.pos + 1
+        when @scanner.scan(/\*\//)
+          return
        else
-          l = line - lines.first[1]
-          split = ss.string.split("\n")
-          col = ss.pos - split[0...l].join("\n").length
-          raise "Parse error (unknown token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
+          @scanner.getch
        end
      end
    end

-    def lex_bison_declarations_tokens
-      lex_common(@bison_declarations, @bison_declarations_tokens)
-    end
-
-    def lex_user_code(ss, line, column, lines)
-      first_line = line
-      first_column = column
-      debug("Enter lex_user_code: #{line}")
-      brace_count = 1
-      str = "{"
-      # Array of [type, $n, tag, first column, last column]
-      # TODO: Is it better to keep string, like "$$", and use gsub?
-      references = []
-
-      while !ss.eos? do
-        case
-        when ss.scan(/\n/)
-          line += 1
-        when ss.scan(/"/)
-          string, line = lex_string(ss, "\"", line, lines)
-          str << string
-          next
-        when ss.scan(/'/)
-          string, line = lex_string(ss, "'", line, lines)
-          str << string
-          next
-
-        # $ references
-        # It need to wrap an identifier with brackets to use ".-" for identifiers
-        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
-          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
-          references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
-          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
-          references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
-          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
-          references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
-          tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
-          references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
-
-        # @ references
-        # It need to wrap an identifier with brackets to use ".-" for identifiers
-        when ss.scan(/@\$/) # @$
-          references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/@(\d+)/) # @1
-          references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
-          references << [:at, ss[1], nil, str.length, str.length + ss[0].length - 1]
-        when ss.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right  (named reference with brackets)
-          references << [:at, ss[1], nil, str.length, str.length + ss[0].length - 1]
-
-        when ss.scan(/{/)
-          brace_count += 1
-        when ss.scan(/}/)
-          brace_count -= 1
-
-          debug("Return lex_user_code: #{line}")
-          if brace_count == 0
-            str << ss[0]
-            user_code = Token.new(type: Token::User_code, s_value: str.freeze)
-            user_code.line = first_line
-            user_code.column = first_column
-            user_code.references = references
-            return [user_code, line]
-          end
-        when ss.scan(/\/\*/)
-          str << ss[0]
-          line = lex_comment(ss, line, lines, str)
-        when ss.scan(/\/\//)
-          str << ss[0]
-          line = lex_line_comment(ss, line, str)
-        else
-          # noop, just consume char
-          str << ss.getch
-          next
-        end
-
-        str << ss[0]
+    def build_token(type:, s_value:, **options)
+      token = Token.new(type: type, s_value: s_value)
+      token.line = @head_line
+      token.column = @head_column
+      options.each do |attr, value|
+        token.public_send("#{attr}=", value)
      end

-      # Reach to end of input but brace does not match
-      l = line - lines.first[1]
-      raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
+      token
    end

-    def lex_string(ss, terminator, line, lines)
-      debug("Enter lex_string: #{line}")
-
-      str = terminator.dup
-
-      while (c = ss.getch) do
-        str << c
-
-        case c
-        when "\n"
-          line += 1
-        when terminator
-          debug("Return lex_string: #{line}")
-          return [str, line]
-        else
-          # noop
-        end
-      end
-
-      # Reach to end of input but quote does not match
-      l = line - lines.first[1]
-      raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
-    end
-
-    # /*  */ style comment
-    def lex_comment(ss, line, lines, str)
-      while !ss.eos? do
-        case
-        when ss.scan(/\n/)
-          line += 1
-        when ss.scan(/\*\//)
-          return line
-        else
-          str << ss.getch
-          next
-        end
-
-        str << ss[0]
-      end
-
-      # Reach to end of input but quote does not match
-      l = line - lines.first[1]
-      raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
-    end
-
-    # // style comment
-    def lex_line_comment(ss, line, str)
-      while !ss.eos? do
-        case
-        when ss.scan(/\n/)
-          return line + 1
-        else
-          str << ss.getch
-          next
-        end
-      end
-
-      line # Reach to end of input
-    end
-
-    def lex_grammar_rules_tokens
-      lex_common(@grammar_rules, @grammar_rules_tokens)
-    end
-
-    def debug(msg)
-      return unless @debug
-      puts "#{msg}\n"
+    def newline
+      @line += 1
+      @head = @scanner.pos + 1
    end
  end
 end
--- a/tool/lrama/lib/lrama/option_parser.rb
+++ b/tool/lrama/lib/lrama/option_parser.rb
@ -52,12 +52,16 @@ module Lrama
          Usage: lrama [options] FILE
        BANNER
        o.separator ''
+        o.separator 'STDIN mode:'
+        o.separator 'lrama [options] - FILE               read grammar from STDIN'
+        o.separator ''
        o.separator 'Tuning the Parser:'
        o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
        o.on('-t', 'reserved, do nothing') { }
        o.separator ''
        o.separator 'Output:'
-        o.on('-h', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
+        o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
+        o.on('-h=[FILE]', 'also produce a header file named FILE (deprecated)') {|v| @options.header = true; @options.header_file = v }
        o.on('-d', 'also produce a header file') { @options.header = true }
        o.on('-r', '--report=THINGS', Array, 'also produce details on the automaton') {|v| @report = v }
        o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v }
--- a/tool/lrama/lib/lrama/output.rb
+++ b/tool/lrama/lib/lrama/output.rb
@ -7,7 +7,7 @@ module Lrama
    extend Forwardable
    include Report::Duration

-    attr_reader :grammar_file_path, :context, :grammar, :error_recovery
+    attr_reader :grammar_file_path, :context, :grammar, :error_recovery, :include_header

    def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
                               :yymaxutok, :yypact_ninf, :yytable_ninf
@ -28,6 +28,7 @@ module Lrama
      @context = context
      @grammar = grammar
      @error_recovery = error_recovery
+      @include_header = header_file_path ? header_file_path.sub("./", "") : nil
    end

    if ERB.instance_method(:initialize).parameters.last.first == :key
@ -40,11 +41,8 @@ module Lrama
      end
    end

-    def eval_template(file, path)
-      erb = self.class.erb(File.read(file))
-      erb.filename = file
-      tmp = erb.result_with_hash(context: @context, output: self)
-      replace_special_variables(tmp, path)
+    def render_partial(file)
+      render_template(partial_file(file))
    end

    def render
@ -143,7 +141,7 @@ module Lrama
        str << <<-STR
    case #{sym.enum_name}: /* #{sym.comment}  */
 #line #{sym.printer.lineno} "#{@grammar_file_path}"
-         #{sym.printer.translated_code(sym.tag)}
+         {#{sym.printer.translated_code(sym.tag)}}
 #line [@oline@] [@ofile@]
        break;

@ -160,7 +158,7 @@ module Lrama
      <<-STR
        #{comment}
 #line #{@grammar.initial_action.line} "#{@grammar_file_path}"
-        #{@grammar.initial_action.translated_code}
+        {#{@grammar.initial_action.translated_code}}
      STR
    end

@ -173,7 +171,7 @@ module Lrama
        str << <<-STR
    case #{sym.enum_name}: /* #{sym.comment}  */
 #line #{sym.error_token.lineno} "#{@grammar_file_path}"
-         #{sym.error_token.translated_code(sym.tag)}
+         {#{sym.error_token.translated_code(sym.tag)}}
 #line [@oline@] [@ofile@]
        break;

@ -190,14 +188,13 @@ module Lrama
      @context.states.rules.each do |rule|
        next unless rule.code

-        rule = rule
        code = rule.code
        spaces = " " * (code.column - 1)

        str << <<-STR
  case #{rule.id + 1}: /* #{rule.as_comment}  */
 #line #{code.line} "#{@grammar_file_path}"
-#{spaces}#{rule.translated_code}
+#{spaces}{#{rule.translated_code}}
 #line [@oline@] [@ofile@]
    break;

@ -212,14 +209,14 @@ module Lrama
      str
    end

-    def omit_braces_and_blanks(param)
-      param[1..-2].strip
+    def omit_blanks(param)
+      param.strip
    end

    # b4_parse_param
    def parse_param
      if @grammar.parse_param
-        omit_braces_and_blanks(@grammar.parse_param)
+        omit_blanks(@grammar.parse_param)
      else
        ""
      end
@ -227,7 +224,7 @@ module Lrama

    def lex_param
      if @grammar.lex_param
-        omit_braces_and_blanks(@grammar.lex_param)
+        omit_blanks(@grammar.lex_param)
      else
        ""
      end
@ -354,6 +351,17 @@ module Lrama

    private

+    def eval_template(file, path)
+      tmp = render_template(file)
+      replace_special_variables(tmp, path)
+    end
+
+    def render_template(file)
+      erb = self.class.erb(File.read(file))
+      erb.filename = file
+      erb.result_with_hash(context: @context, output: self)
+    end
+
    def template_file
      File.join(template_dir, @template_name)
    end
@ -362,6 +370,10 @@ module Lrama
      File.join(template_dir, "bison/yacc.h")
    end

+    def partial_file(file)
+      File.join(template_dir, file)
+    end
+
    def template_dir
      File.expand_path("../../../template", __FILE__)
    end
--- a/tool/lrama/lib/lrama/parser.rb
+++ b/tool/lrama/lib/lrama/parser.rb
--- a/tool/lrama/lib/lrama/parser/token_scanner.rb
+++ b/tool/lrama/lib/lrama/parser/token_scanner.rb
@ -1,56 +0,0 @@
-module Lrama
-  class Parser
-    class TokenScanner
-      def initialize(tokens)
-        @tokens = tokens
-        @index = 0
-      end
-
-      def current_token
-        @tokens[@index]
-      end
-
-      def current_type
-        current_token&.type
-      end
-
-      def previous_token
-        @tokens[@index - 1]
-      end
-
-      def next
-        token = current_token
-        @index += 1
-        return token
-      end
-
-      def consume(*token_types)
-        if token_types.include?(current_type)
-          return self.next
-        end
-
-        return nil
-      end
-
-      def consume!(*token_types)
-        consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
-      end
-
-      def consume_multi(*token_types)
-        a = []
-
-        while token_types.include?(current_type)
-          a << self.next
-        end
-
-        raise "No token is consumed. #{token_types}" if a.empty?
-
-        return a
-      end
-
-      def eots?
-        current_token.nil?
-      end
-    end
-  end
-end
--- a/tool/lrama/lib/lrama/version.rb
+++ b/tool/lrama/lib/lrama/version.rb
@ -1,3 +1,3 @@
 module Lrama
-  VERSION = "0.5.6".freeze
+  VERSION = "0.5.7".freeze
 end
--- a/tool/lrama/template/bison/_yacc.h
+++ b/tool/lrama/template/bison/_yacc.h
@ -0,0 +1,71 @@
+<%# b4_shared_declarations -%>
+  <%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
+    <%- if output.spec_mapped_header_file -%>
+#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
+# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
+    <%- end -%>
+  <%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
+/* Debug traces.  */
+#ifndef YYDEBUG
+# define YYDEBUG 0
+#endif
+#if YYDEBUG && !defined(yydebug)
+extern int yydebug;
+#endif
+  <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
+
+  <%-# b4_token_enums_defines -%>
+/* Token kinds.  */
+#ifndef YYTOKENTYPE
+# define YYTOKENTYPE
+  enum yytokentype
+  {
+<%= output.token_enums -%>
+  };
+  typedef enum yytokentype yytoken_kind_t;
+#endif
+
+  <%-# b4_declare_yylstype -%>
+    <%-# b4_value_type_define -%>
+/* Value type.  */
+#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
+union YYSTYPE
+{
+#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
+<%= output.grammar.union.braces_less_code %>
+#line [@oline@] [@ofile@]
+
+};
+typedef union YYSTYPE YYSTYPE;
+# define YYSTYPE_IS_TRIVIAL 1
+# define YYSTYPE_IS_DECLARED 1
+#endif
+
+    <%-# b4_location_type_define -%>
+/* Location type.  */
+#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
+typedef struct YYLTYPE YYLTYPE;
+struct YYLTYPE
+{
+  int first_line;
+  int first_column;
+  int last_line;
+  int last_column;
+};
+# define YYLTYPE_IS_DECLARED 1
+# define YYLTYPE_IS_TRIVIAL 1
+#endif
+
+
+
+
+  <%-# b4_declare_yyerror_and_yylex. Not supported -%>
+  <%-# b4_declare_yyparse -%>
+int yyparse (<%= output.parse_param %>);
+
+
+  <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
+  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
+    <%- if output.spec_mapped_header_file -%>
+#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %>  */
+    <%- end -%>
--- a/tool/lrama/template/bison/yacc.c
+++ b/tool/lrama/template/bison/yacc.c
@ -101,79 +101,13 @@
 # endif

 <%# b4_header_include_if -%>
+<%- if output.include_header -%>
+#include "<%= output.include_header %>"
+<%- else -%>
 /* Use api.header.include to #include this header
   instead of duplicating it here.  */
-<%# b4_shared_declarations -%>
-  <%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
-    <%- if output.spec_mapped_header_file -%>
-#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
-# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
-    <%- end -%>
-  <%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
-/* Debug traces.  */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
-#if YYDEBUG && !defined(yydebug)
-extern int yydebug;
-#endif
-  <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
-
-  <%-# b4_token_enums_defines -%>
-/* Token kinds.  */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
-  enum yytokentype
-  {
-<%= output.token_enums -%>
-  };
-  typedef enum yytokentype yytoken_kind_t;
-#endif
-
-  <%-# b4_declare_yylstype -%>
-    <%-# b4_value_type_define -%>
-/* Value type.  */
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-union YYSTYPE
-{
-#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
-<%= output.grammar.union.braces_less_code %>
-#line [@oline@] [@ofile@]
-
-};
-typedef union YYSTYPE YYSTYPE;
-# define YYSTYPE_IS_TRIVIAL 1
-# define YYSTYPE_IS_DECLARED 1
-#endif
-
-    <%-# b4_location_type_define -%>
-/* Location type.  */
-#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
-typedef struct YYLTYPE YYLTYPE;
-struct YYLTYPE
-{
-  int first_line;
-  int first_column;
-  int last_line;
-  int last_column;
-};
-# define YYLTYPE_IS_DECLARED 1
-# define YYLTYPE_IS_TRIVIAL 1
-#endif
-
-
-
-
-  <%-# b4_declare_yyerror_and_yylex. Not supported -%>
-  <%-# b4_declare_yyparse -%>
-int yyparse (<%= output.parse_param %>);
-
-
-  <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
-  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
-    <%- if output.spec_mapped_header_file -%>
-#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %>  */
-    <%- end -%>
+<%= output.render_partial("bison/_yacc.h") %>
+<%- end -%>
 <%# b4_declare_symbol_enum -%>
 /* Symbol kind.  */
 enum yysymbol_kind_t
@ -2114,3 +2048,4 @@ yyreturnlab:
 #line <%= output.aux.epilogue_first_lineno - 1 %> "<%= output.grammar_file_path %>"

 <%= output.aux.epilogue -%>
+
--- a/tool/lrama/template/bison/yacc.h
+++ b/tool/lrama/template/bison/yacc.h
@ -37,76 +37,4 @@
 /* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
   especially those whose name start with YY_ or yy_.  They are
   private implementation details that can be changed or removed.  */
-
-<%# b4_shared_declarations -%>
-<%# b4_shared_declarations -%>
-  <%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
-    <%- if output.spec_mapped_header_file -%>
-#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
-# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
-    <%- end -%>
-  <%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
-/* Debug traces.  */
-#ifndef YYDEBUG
-# define YYDEBUG 0
-#endif
-#if YYDEBUG
-extern int yydebug;
-#endif
-  <%-# b4_percent_code_get([[requires]]). %code is not supported -%>
-
-  <%-# b4_token_enums_defines -%>
-/* Token kinds.  */
-#ifndef YYTOKENTYPE
-# define YYTOKENTYPE
-  enum yytokentype
-  {
-<%= output.token_enums -%>
-  };
-  typedef enum yytokentype yytoken_kind_t;
-#endif
-
-  <%-# b4_declare_yylstype -%>
-    <%-# b4_value_type_define -%>
-/* Value type.  */
-#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
-union YYSTYPE
-{
-#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
-<%= output.grammar.union.braces_less_code %>
-#line [@oline@] [@ofile@]
-
-};
-typedef union YYSTYPE YYSTYPE;
-# define YYSTYPE_IS_TRIVIAL 1
-# define YYSTYPE_IS_DECLARED 1
-#endif
-
-    <%-# b4_location_type_define -%>
-/* Location type.  */
-#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
-typedef struct YYLTYPE YYLTYPE;
-struct YYLTYPE
-{
-  int first_line;
-  int first_column;
-  int last_line;
-  int last_column;
-};
-# define YYLTYPE_IS_DECLARED 1
-# define YYLTYPE_IS_TRIVIAL 1
-#endif
-
-
-
-
-  <%-# b4_declare_yyerror_and_yylex. Not supported -%>
-  <%-# b4_declare_yyparse -%>
-int yyparse (<%= output.parse_param %>);
-
-
-  <%-# b4_percent_code_get([[provides]]). %code is not supported -%>
-  <%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
-    <%- if output.spec_mapped_header_file -%>
-#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %>  */
-    <%- end -%>
+<%= output.render_partial("bison/_yacc.h") %>