Lrama v0.5.7

This commit is contained in:
yui-knk 2023-10-23 09:07:23 +09:00 committed by Yuichiro Kaneko
parent dd9b4851a3
commit 2d468358a5
14 changed files with 2224 additions and 812 deletions

View File

@ -6,8 +6,9 @@ module Lrama
Report::Duration.enable if options.trace_opts[:time]
warning = Lrama::Warning.new
grammar = Lrama::Parser.new(options.y.read).parse
text = options.y.read
options.y.close if options.y != STDIN
grammar = Lrama::Parser.new(text).parse
states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
states.compute
context = Lrama::Context.new(states)

View File

@ -170,7 +170,7 @@ module Lrama
return a
end
# Mapping from rule number to lenght of RHS.
# Mapping from rule number to length of RHS.
# Dummy rule is appended as the first element whose value is 0
# because 0 means error in yydefact.
def yyr2
@ -214,7 +214,7 @@ module Lrama
(rule_id + 1) * -1
end
# Symbol number is assinged to term first then nterm.
# Symbol number is assigned to term first then nterm.
# This method calculates sequence_number for nterm.
def nterm_number_to_sequence_number(nterm_number)
nterm_number - @states.terms.count
@ -259,7 +259,7 @@ module Lrama
actions[conflict.symbol.number] = ErrorActionNumber
end
# If default_reduction_rule, replase default_reduction_rule in
# If default_reduction_rule, replace default_reduction_rule in
# actions with zero.
if state.default_reduction_rule
actions.map! do |e|
@ -272,7 +272,7 @@ module Lrama
end
# If no default_reduction_rule, default behavior is an
# error then replase ErrorActionNumber with zero.
# error then replace ErrorActionNumber with zero.
if !state.default_reduction_rule
actions.map! do |e|
if e == ErrorActionNumber

View File

@ -40,8 +40,7 @@ module Lrama
end
if @h[x] == d
while true do
z = @stack.pop
while (z = @stack.pop) do
@h[z] = Float::INFINITY
break if z == x
@result[z] = @result[x] # F (Top of S) = F x

View File

@ -1,3 +1,5 @@
require "strscan"
require "lrama/grammar/auxiliary"
require "lrama/grammar/code"
require "lrama/grammar/error_token"
@ -306,6 +308,188 @@ module Lrama
@nterms ||= @symbols.select(&:nterm?)
end
def extract_references
unless initial_action.nil?
scanner = StringScanner.new(initial_action.s_value)
references = []
while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
else
scanner.getch
end
end
initial_action.token_code.references = references
build_references(initial_action.token_code)
end
@printers.each do |printer|
scanner = StringScanner.new(printer.code.s_value)
references = []
while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
else
scanner.getch
end
end
printer.code.token_code.references = references
build_references(printer.code.token_code)
end
@error_tokens.each do |error_token|
scanner = StringScanner.new(error_token.code.s_value)
references = []
while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
else
scanner.getch
end
end
error_token.code.token_code.references = references
build_references(error_token.code.token_code)
end
@_rules.each do |lhs, rhs, _|
rhs.each_with_index do |token, index|
next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
scanner = StringScanner.new(token.s_value)
references = []
while !scanner.eos? do
start = scanner.pos
case
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, "$", tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
references << [:dollar, scanner[2], tag, start, scanner.pos - 1]
# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
references << [:at, "$", nil, start, scanner.pos - 1]
when scanner.scan(/@(\d+)/) # @1
references << [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, scanner[1], nil, start, scanner.pos - 1]
when scanner.scan(/\/\*/)
scanner.scan_until(/\*\//)
else
scanner.getch
end
end
token.references = references
token.numberize_references(lhs, rhs)
build_references(token)
end
end
end
def create_token(type, s_value, line, column)
t = Token.new(type: type, s_value: s_value)
t.line = line
t.column = column
return t
end
private
def find_nterm_by_id!(id)
@ -470,7 +654,9 @@ module Lrama
# Fill #number and #token_id
def fill_symbol_number
# TODO: why start from 256
# Character literal in grammar file has
# token id corresponding to ASCII code by default,
# so start token_id from 256.
token_id = 256
# YYEMPTY = -2

View File

@ -2,8 +2,8 @@ module Lrama
class Grammar
class Union < Struct.new(:code, :lineno, keyword_init: true)
def braces_less_code
# Remove braces
code.s_value[1..-2]
# Braces is already removed by lexer
code.s_value
end
end
end

View File

@ -1,346 +1,174 @@
require "strscan"
require "lrama/report/duration"
require "lrama/lexer/token"
module Lrama
# Lexer for parse.y
class Lexer
include Lrama::Report::Duration
attr_accessor :status
attr_accessor :end_symbol
# States
#
# See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
Initial = 0
Prologue = 1
BisonDeclarations = 2
GrammarRules = 3
Epilogue = 4
# Token types
attr_reader :prologue, :bison_declarations, :grammar_rules, :epilogue,
:bison_declarations_tokens, :grammar_rules_tokens
SYMBOLS = %w(%{ %} %% { } \[ \] : \| ;)
PERCENT_TOKENS = %w(
%union
%token
%type
%left
%right
%nonassoc
%expect
%define
%require
%printer
%lex-param
%parse-param
%initial-action
%precedence
%prec
%error-token
)
def initialize(text)
@text = text
@state = Initial
# Array of texts
@prologue = []
@bison_declarations = []
@grammar_rules = []
@epilogue = []
@scanner = StringScanner.new(text)
@head = @scanner.pos
@line = 1
@status = :initial
@end_symbol = nil
end
@bison_declarations_tokens = []
@grammar_rules_tokens = []
@debug = false
report_duration(:lex) do
lex_text
lex_bison_declarations_tokens
lex_grammar_rules_tokens
def next_token
case @status
when :initial
lex_token
when :c_declaration
lex_c_code
end
end
def line
@line
end
def column
@scanner.pos - @head
end
def lex_token
while !@scanner.eos? do
case
when @scanner.scan(/\n/)
newline
when @scanner.scan(/\s+/)
# noop
when @scanner.scan(/\/\*/)
lex_comment
when @scanner.scan(/\/\//)
@scanner.scan_until(/\n/)
newline
when @scanner.scan(/%empty/)
# noop
else
break
end
end
@head_line = line
@head_column = column
case
when @scanner.eos?
return
when @scanner.scan(/#{SYMBOLS.join('|')}/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
return [@scanner.matched, @scanner.matched]
when @scanner.scan(/<\w+>/)
return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
when @scanner.scan(/'.'/)
return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
when @scanner.scan(/"/)
return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
when @scanner.scan(/\d+/)
return [:INTEGER, Integer(@scanner.matched)]
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
token = build_token(type: Token::Ident, s_value: @scanner.matched)
type =
if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
:IDENT_COLON
else
:IDENTIFIER
end
return [type, token]
else
raise
end
end
def lex_c_code
nested = 0
code = ''
while !@scanner.eos? do
case
when @scanner.scan(/{/)
code += @scanner.matched
nested += 1
when @scanner.scan(/}/)
if nested == 0 && @end_symbol == '}'
@scanner.unscan
return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
else
code += @scanner.matched
nested -= 1
end
when @scanner.check(/#{@end_symbol}/)
return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
when @scanner.scan(/\n/)
code += @scanner.matched
newline
when @scanner.scan(/"/)
matched = @scanner.scan_until(/"/)
code += %Q("#{matched})
@line += matched.count("\n")
when @scanner.scan(/'/)
matched = @scanner.scan_until(/'/)
code += %Q('#{matched})
else
code += @scanner.getch
end
end
raise
end
private
def create_token(type, s_value, line, column)
t = Token.new(type: type, s_value: s_value)
t.line = line
t.column = column
return t
end
# TODO: Remove this
def lex_text
@text.each_line.with_index(1) do |string, lineno|
case @state
when Initial
# Skip until "%{"
if string == "%{\n"
@state = Prologue
@prologue << ["", lineno]
next
end
when Prologue
# Between "%{" and "%}"
if string == "%}\n"
@state = BisonDeclarations
@prologue << ["", lineno]
next
end
@prologue << [string, lineno]
when BisonDeclarations
if string == "%%\n"
@state = GrammarRules
next
end
@bison_declarations << [string, lineno]
when GrammarRules
# Between "%%" and "%%"
if string == "%%\n"
@state = Epilogue
next
end
@grammar_rules << [string, lineno]
when Epilogue
@epilogue << [string, lineno]
else
raise "Unknown state: #{@state}"
end
end
end
# See:
# * https://www.gnu.org/software/bison/manual/html_node/Decl-Summary.html
# * https://www.gnu.org/software/bison/manual/html_node/Symbol-Decls.html
# * https://www.gnu.org/software/bison/manual/html_node/Empty-Rules.html
def lex_common(lines, tokens)
line = lines.first[1]
column = 0
ss = StringScanner.new(lines.map(&:first).join)
while !ss.eos? do
def lex_comment
while !@scanner.eos? do
case
when ss.scan(/\n/)
line += 1
column = ss.pos
when ss.scan(/\s+/)
# skip
when ss.scan(/;/)
tokens << create_token(Token::Semicolon, ss[0], line, ss.pos - column)
when ss.scan(/\|/)
tokens << create_token(Token::Bar, ss[0], line, ss.pos - column)
when ss.scan(/(\d+)/)
tokens << create_token(Token::Number, Integer(ss[0]), line, ss.pos - column)
when ss.scan(/(<[a-zA-Z0-9_]+>)/)
tokens << create_token(Token::Tag, ss[0], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
tokens << create_token(Token::Named_Ref, ss[2], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)\s*:/)
tokens << create_token(Token::Ident_Colon, ss[1], line, ss.pos - column)
when ss.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
tokens << create_token(Token::Ident, ss[0], line, ss.pos - column)
when ss.scan(/\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/)
tokens << create_token(Token::Named_Ref, ss[1], line, ss.pos - column)
when ss.scan(/%expect/)
tokens << create_token(Token::P_expect, ss[0], line, ss.pos - column)
when ss.scan(/%define/)
tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
when ss.scan(/%printer/)
tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
when ss.scan(/%error-token/)
tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
when ss.scan(/%lex-param/)
tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
when ss.scan(/%parse-param/)
tokens << create_token(Token::P_parse_param, ss[0], line, ss.pos - column)
when ss.scan(/%initial-action/)
tokens << create_token(Token::P_initial_action, ss[0], line, ss.pos - column)
when ss.scan(/%union/)
tokens << create_token(Token::P_union, ss[0], line, ss.pos - column)
when ss.scan(/%token/)
tokens << create_token(Token::P_token, ss[0], line, ss.pos - column)
when ss.scan(/%type/)
tokens << create_token(Token::P_type, ss[0], line, ss.pos - column)
when ss.scan(/%nonassoc/)
tokens << create_token(Token::P_nonassoc, ss[0], line, ss.pos - column)
when ss.scan(/%left/)
tokens << create_token(Token::P_left, ss[0], line, ss.pos - column)
when ss.scan(/%right/)
tokens << create_token(Token::P_right, ss[0], line, ss.pos - column)
when ss.scan(/%precedence/)
tokens << create_token(Token::P_precedence, ss[0], line, ss.pos - column)
when ss.scan(/%prec/)
tokens << create_token(Token::P_prec, ss[0], line, ss.pos - column)
when ss.scan(/{/)
token, line = lex_user_code(ss, line, ss.pos - column, lines)
tokens << token
when ss.scan(/"/)
string, line = lex_string(ss, "\"", line, lines)
token = create_token(Token::String, string, line, ss.pos - column)
tokens << token
when ss.scan(/\/\*/)
# TODO: Need to keep comment?
line = lex_comment(ss, line, lines, "")
when ss.scan(/\/\//)
line = lex_line_comment(ss, line, "")
when ss.scan(/'(.)'/)
tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
when ss.scan(/'\\(.)'/) # '\\', '\t'
tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
when ss.scan(/'\\(\d+)'/) # '\13'
tokens << create_token(Token::Char, ss[0], line, ss.pos - column)
when ss.scan(/%empty/)
# skip
when @scanner.scan(/\n/)
@line += 1
@head = @scanner.pos + 1
when @scanner.scan(/\*\//)
return
else
l = line - lines.first[1]
split = ss.string.split("\n")
col = ss.pos - split[0...l].join("\n").length
raise "Parse error (unknown token): #{split[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{col})"
@scanner.getch
end
end
end
def lex_bison_declarations_tokens
lex_common(@bison_declarations, @bison_declarations_tokens)
end
def lex_user_code(ss, line, column, lines)
first_line = line
first_column = column
debug("Enter lex_user_code: #{line}")
brace_count = 1
str = "{"
# Array of [type, $n, tag, first column, last column]
# TODO: Is it better to keep string, like "$$", and use gsub?
references = []
while !ss.eos? do
case
when ss.scan(/\n/)
line += 1
when ss.scan(/"/)
string, line = lex_string(ss, "\"", line, lines)
str << string
next
when ss.scan(/'/)
string, line = lex_string(ss, "'", line, lines)
str << string
next
# $ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, "$", tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, Integer(ss[2]), tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
when ss.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
tag = ss[1] ? create_token(Token::Tag, ss[1], line, str.length) : nil
references << [:dollar, ss[2], tag, str.length, str.length + ss[0].length - 1]
# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when ss.scan(/@\$/) # @$
references << [:at, "$", nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/@(\d+)/) # @1
references << [:at, Integer(ss[1]), nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
references << [:at, ss[1], nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
references << [:at, ss[1], nil, str.length, str.length + ss[0].length - 1]
when ss.scan(/{/)
brace_count += 1
when ss.scan(/}/)
brace_count -= 1
debug("Return lex_user_code: #{line}")
if brace_count == 0
str << ss[0]
user_code = Token.new(type: Token::User_code, s_value: str.freeze)
user_code.line = first_line
user_code.column = first_column
user_code.references = references
return [user_code, line]
end
when ss.scan(/\/\*/)
str << ss[0]
line = lex_comment(ss, line, lines, str)
when ss.scan(/\/\//)
str << ss[0]
line = lex_line_comment(ss, line, str)
else
# noop, just consume char
str << ss.getch
next
end
str << ss[0]
def build_token(type:, s_value:, **options)
token = Token.new(type: type, s_value: s_value)
token.line = @head_line
token.column = @head_column
options.each do |attr, value|
token.public_send("#{attr}=", value)
end
# Reach to end of input but brace does not match
l = line - lines.first[1]
raise "Parse error (brace mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
token
end
def lex_string(ss, terminator, line, lines)
debug("Enter lex_string: #{line}")
str = terminator.dup
while (c = ss.getch) do
str << c
case c
when "\n"
line += 1
when terminator
debug("Return lex_string: #{line}")
return [str, line]
else
# noop
end
end
# Reach to end of input but quote does not match
l = line - lines.first[1]
raise "Parse error (quote mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
end
# /* */ style comment
def lex_comment(ss, line, lines, str)
while !ss.eos? do
case
when ss.scan(/\n/)
line += 1
when ss.scan(/\*\//)
return line
else
str << ss.getch
next
end
str << ss[0]
end
# Reach to end of input but quote does not match
l = line - lines.first[1]
raise "Parse error (comment mismatch): #{ss.string.split("\n")[l]} \"#{ss.string[ss.pos]}\" (#{line}: #{ss.pos})"
end
# // style comment
def lex_line_comment(ss, line, str)
while !ss.eos? do
case
when ss.scan(/\n/)
return line + 1
else
str << ss.getch
next
end
end
line # Reach to end of input
end
def lex_grammar_rules_tokens
lex_common(@grammar_rules, @grammar_rules_tokens)
end
def debug(msg)
return unless @debug
puts "#{msg}\n"
def newline
@line += 1
@head = @scanner.pos + 1
end
end
end

View File

@ -52,12 +52,16 @@ module Lrama
Usage: lrama [options] FILE
BANNER
o.separator ''
o.separator 'STDIN mode:'
o.separator 'lrama [options] - FILE read grammar from STDIN'
o.separator ''
o.separator 'Tuning the Parser:'
o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
o.on('-t', 'reserved, do nothing') { }
o.separator ''
o.separator 'Output:'
o.on('-h', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
o.on('-h=[FILE]', 'also produce a header file named FILE (deprecated)') {|v| @options.header = true; @options.header_file = v }
o.on('-d', 'also produce a header file') { @options.header = true }
o.on('-r', '--report=THINGS', Array, 'also produce details on the automaton') {|v| @report = v }
o.on('--report-file=FILE', 'also produce details on the automaton output to a file named FILE') {|v| @options.report_file = v }

View File

@ -7,7 +7,7 @@ module Lrama
extend Forwardable
include Report::Duration
attr_reader :grammar_file_path, :context, :grammar, :error_recovery
attr_reader :grammar_file_path, :context, :grammar, :error_recovery, :include_header
def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
:yymaxutok, :yypact_ninf, :yytable_ninf
@ -28,6 +28,7 @@ module Lrama
@context = context
@grammar = grammar
@error_recovery = error_recovery
@include_header = header_file_path ? header_file_path.sub("./", "") : nil
end
if ERB.instance_method(:initialize).parameters.last.first == :key
@ -40,11 +41,8 @@ module Lrama
end
end
def eval_template(file, path)
erb = self.class.erb(File.read(file))
erb.filename = file
tmp = erb.result_with_hash(context: @context, output: self)
replace_special_variables(tmp, path)
def render_partial(file)
render_template(partial_file(file))
end
def render
@ -143,7 +141,7 @@ module Lrama
str << <<-STR
case #{sym.enum_name}: /* #{sym.comment} */
#line #{sym.printer.lineno} "#{@grammar_file_path}"
#{sym.printer.translated_code(sym.tag)}
{#{sym.printer.translated_code(sym.tag)}}
#line [@oline@] [@ofile@]
break;
@ -160,7 +158,7 @@ module Lrama
<<-STR
#{comment}
#line #{@grammar.initial_action.line} "#{@grammar_file_path}"
#{@grammar.initial_action.translated_code}
{#{@grammar.initial_action.translated_code}}
STR
end
@ -173,7 +171,7 @@ module Lrama
str << <<-STR
case #{sym.enum_name}: /* #{sym.comment} */
#line #{sym.error_token.lineno} "#{@grammar_file_path}"
#{sym.error_token.translated_code(sym.tag)}
{#{sym.error_token.translated_code(sym.tag)}}
#line [@oline@] [@ofile@]
break;
@ -190,14 +188,13 @@ module Lrama
@context.states.rules.each do |rule|
next unless rule.code
rule = rule
code = rule.code
spaces = " " * (code.column - 1)
str << <<-STR
case #{rule.id + 1}: /* #{rule.as_comment} */
#line #{code.line} "#{@grammar_file_path}"
#{spaces}#{rule.translated_code}
#{spaces}{#{rule.translated_code}}
#line [@oline@] [@ofile@]
break;
@ -212,14 +209,14 @@ module Lrama
str
end
def omit_braces_and_blanks(param)
param[1..-2].strip
def omit_blanks(param)
param.strip
end
# b4_parse_param
def parse_param
if @grammar.parse_param
omit_braces_and_blanks(@grammar.parse_param)
omit_blanks(@grammar.parse_param)
else
""
end
@ -227,7 +224,7 @@ module Lrama
def lex_param
if @grammar.lex_param
omit_braces_and_blanks(@grammar.lex_param)
omit_blanks(@grammar.lex_param)
else
""
end
@ -354,6 +351,17 @@ module Lrama
private
def eval_template(file, path)
tmp = render_template(file)
replace_special_variables(tmp, path)
end
def render_template(file)
erb = self.class.erb(File.read(file))
erb.filename = file
erb.result_with_hash(context: @context, output: self)
end
def template_file
File.join(template_dir, @template_name)
end
@ -362,6 +370,10 @@ module Lrama
File.join(template_dir, "bison/yacc.h")
end
def partial_file(file)
File.join(template_dir, file)
end
def template_dir
File.expand_path("../../../template", __FILE__)
end

File diff suppressed because it is too large Load Diff

View File

@ -1,56 +0,0 @@
module Lrama
class Parser
class TokenScanner
def initialize(tokens)
@tokens = tokens
@index = 0
end
def current_token
@tokens[@index]
end
def current_type
current_token&.type
end
def previous_token
@tokens[@index - 1]
end
def next
token = current_token
@index += 1
return token
end
def consume(*token_types)
if token_types.include?(current_type)
return self.next
end
return nil
end
def consume!(*token_types)
consume(*token_types) || (raise "#{token_types} is expected but #{current_type}. #{current_token}")
end
def consume_multi(*token_types)
a = []
while token_types.include?(current_type)
a << self.next
end
raise "No token is consumed. #{token_types}" if a.empty?
return a
end
def eots?
current_token.nil?
end
end
end
end

View File

@ -1,3 +1,3 @@
module Lrama
VERSION = "0.5.6".freeze
VERSION = "0.5.7".freeze
end

View File

@ -0,0 +1,71 @@
<%# b4_shared_declarations -%>
<%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
<%- end -%>
<%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG && !defined(yydebug)
extern int yydebug;
#endif
<%-# b4_percent_code_get([[requires]]). %code is not supported -%>
<%-# b4_token_enums_defines -%>
/* Token kinds. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
<%= output.token_enums -%>
};
typedef enum yytokentype yytoken_kind_t;
#endif
<%-# b4_declare_yylstype -%>
<%-# b4_value_type_define -%>
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
<%= output.grammar.union.braces_less_code %>
#line [@oline@] [@ofile@]
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
<%-# b4_location_type_define -%>
/* Location type. */
#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
typedef struct YYLTYPE YYLTYPE;
struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
};
# define YYLTYPE_IS_DECLARED 1
# define YYLTYPE_IS_TRIVIAL 1
#endif
<%-# b4_declare_yyerror_and_yylex. Not supported -%>
<%-# b4_declare_yyparse -%>
int yyparse (<%= output.parse_param %>);
<%-# b4_percent_code_get([[provides]]). %code is not supported -%>
<%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
<%- end -%>

View File

@ -101,79 +101,13 @@
# endif
<%# b4_header_include_if -%>
<%- if output.include_header -%>
#include "<%= output.include_header %>"
<%- else -%>
/* Use api.header.include to #include this header
instead of duplicating it here. */
<%# b4_shared_declarations -%>
<%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
<%- end -%>
<%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG && !defined(yydebug)
extern int yydebug;
#endif
<%-# b4_percent_code_get([[requires]]). %code is not supported -%>
<%-# b4_token_enums_defines -%>
/* Token kinds. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
<%= output.token_enums -%>
};
typedef enum yytokentype yytoken_kind_t;
#endif
<%-# b4_declare_yylstype -%>
<%-# b4_value_type_define -%>
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
<%= output.grammar.union.braces_less_code %>
#line [@oline@] [@ofile@]
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
<%-# b4_location_type_define -%>
/* Location type. */
#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
typedef struct YYLTYPE YYLTYPE;
struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
};
# define YYLTYPE_IS_DECLARED 1
# define YYLTYPE_IS_TRIVIAL 1
#endif
<%-# b4_declare_yyerror_and_yylex. Not supported -%>
<%-# b4_declare_yyparse -%>
int yyparse (<%= output.parse_param %>);
<%-# b4_percent_code_get([[provides]]). %code is not supported -%>
<%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
<%- end -%>
<%= output.render_partial("bison/_yacc.h") %>
<%- end -%>
<%# b4_declare_symbol_enum -%>
/* Symbol kind. */
enum yysymbol_kind_t
@ -2114,3 +2048,4 @@ yyreturnlab:
#line <%= output.aux.epilogue_first_lineno - 1 %> "<%= output.grammar_file_path %>"
<%= output.aux.epilogue -%>

View File

@ -37,76 +37,4 @@
/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual,
especially those whose name start with YY_ or yy_. They are
private implementation details that can be changed or removed. */
<%# b4_shared_declarations -%>
<%# b4_shared_declarations -%>
<%-# b4_cpp_guard_open([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#ifndef <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
# define <%= output.b4_cpp_guard__b4_spec_mapped_header_file %>
<%- end -%>
<%-# b4_declare_yydebug & b4_YYDEBUG_define -%>
/* Debug traces. */
#ifndef YYDEBUG
# define YYDEBUG 0
#endif
#if YYDEBUG
extern int yydebug;
#endif
<%-# b4_percent_code_get([[requires]]). %code is not supported -%>
<%-# b4_token_enums_defines -%>
/* Token kinds. */
#ifndef YYTOKENTYPE
# define YYTOKENTYPE
enum yytokentype
{
<%= output.token_enums -%>
};
typedef enum yytokentype yytoken_kind_t;
#endif
<%-# b4_declare_yylstype -%>
<%-# b4_value_type_define -%>
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line <%= output.grammar.union.lineno %> "<%= output.grammar_file_path %>"
<%= output.grammar.union.braces_less_code %>
#line [@oline@] [@ofile@]
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1
#endif
<%-# b4_location_type_define -%>
/* Location type. */
#if ! defined YYLTYPE && ! defined YYLTYPE_IS_DECLARED
typedef struct YYLTYPE YYLTYPE;
struct YYLTYPE
{
int first_line;
int first_column;
int last_line;
int last_column;
};
# define YYLTYPE_IS_DECLARED 1
# define YYLTYPE_IS_TRIVIAL 1
#endif
<%-# b4_declare_yyerror_and_yylex. Not supported -%>
<%-# b4_declare_yyparse -%>
int yyparse (<%= output.parse_param %>);
<%-# b4_percent_code_get([[provides]]). %code is not supported -%>
<%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
<%- if output.spec_mapped_header_file -%>
#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
<%- end -%>
<%= output.render_partial("bison/_yacc.h") %>