Lrama v0.5.9
This commit is contained in:
parent
368a1cb3c4
commit
a15aa259db
@ -8,7 +8,7 @@ module Lrama
|
||||
warning = Lrama::Warning.new
|
||||
text = options.y.read
|
||||
options.y.close if options.y != STDIN
|
||||
grammar = Lrama::Parser.new(text, options.grammar_file).parse
|
||||
grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse
|
||||
states = Lrama::States.new(grammar, warning, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure]))
|
||||
states.compute
|
||||
context = Lrama::Context.new(states)
|
||||
@ -20,6 +20,11 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
if options.trace_opts && options.trace_opts[:rules]
|
||||
puts "Grammar rules:"
|
||||
puts grammar.rules
|
||||
end
|
||||
|
||||
File.open(options.outfile, "w+") do |f|
|
||||
Lrama::Output.new(
|
||||
out: f,
|
||||
|
@ -3,6 +3,7 @@ require "strscan"
|
||||
require "lrama/grammar/auxiliary"
|
||||
require "lrama/grammar/code"
|
||||
require "lrama/grammar/error_token"
|
||||
require "lrama/grammar/percent_code"
|
||||
require "lrama/grammar/precedence"
|
||||
require "lrama/grammar/printer"
|
||||
require "lrama/grammar/reference"
|
||||
@ -13,11 +14,9 @@ require "lrama/lexer"
|
||||
require "lrama/type"
|
||||
|
||||
module Lrama
|
||||
Token = Lrama::Lexer::Token
|
||||
|
||||
# Grammar is the result of parsing an input grammar file
|
||||
class Grammar
|
||||
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
||||
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
||||
attr_accessor :union, :expect,
|
||||
:printers, :error_tokens,
|
||||
:lex_param, :parse_param, :initial_action,
|
||||
@ -26,6 +25,8 @@ module Lrama
|
||||
:sym_to_rules
|
||||
|
||||
def initialize
|
||||
# Code defined by "%code"
|
||||
@percent_codes = []
|
||||
@printers = []
|
||||
@error_tokens = []
|
||||
@symbols = []
|
||||
@ -43,6 +44,10 @@ module Lrama
|
||||
append_special_symbols
|
||||
end
|
||||
|
||||
def add_percent_code(id:, code:)
|
||||
@percent_codes << PercentCode.new(id, code)
|
||||
end
|
||||
|
||||
def add_printer(ident_or_tags:, code:, lineno:)
|
||||
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
|
||||
end
|
||||
@ -122,16 +127,7 @@ module Lrama
|
||||
@_rules << [lhs, rhs, lineno]
|
||||
end
|
||||
|
||||
def build_references(token_code)
|
||||
token_code.references.map! do |type, value, tag, first_column, last_column|
|
||||
Reference.new(type: type, value: value, ex_tag: tag, first_column: first_column, last_column: last_column)
|
||||
end
|
||||
|
||||
token_code
|
||||
end
|
||||
|
||||
def build_code(type, token_code)
|
||||
build_references(token_code)
|
||||
Code.new(type: type, token_code: token_code)
|
||||
end
|
||||
|
||||
@ -152,6 +148,7 @@ module Lrama
|
||||
end
|
||||
|
||||
def prepare
|
||||
extract_references
|
||||
normalize_rules
|
||||
collect_symbols
|
||||
replace_token_with_symbol
|
||||
@ -314,31 +311,33 @@ module Lrama
|
||||
# $ references
|
||||
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
||||
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
|
||||
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
||||
return [:dollar, "$", tag, start, scanner.pos - 1]
|
||||
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
||||
return Reference.new(type: :dollar, value: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
||||
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
||||
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
||||
return [:dollar, Integer(scanner[2]), tag, start, scanner.pos - 1]
|
||||
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
||||
return Reference.new(type: :dollar, value: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
||||
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
||||
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
||||
return [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
||||
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
||||
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
||||
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
|
||||
tag = scanner[1] ? Lrama::Lexer::Token.new(type: Lrama::Lexer::Token::Tag, s_value: scanner[1]) : nil
|
||||
return [:dollar, scanner[2], tag, start, scanner.pos - 1]
|
||||
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
||||
return Reference.new(type: :dollar, value: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
|
||||
|
||||
# @ references
|
||||
# It need to wrap an identifier with brackets to use ".-" for identifiers
|
||||
when scanner.scan(/@\$/) # @$
|
||||
return [:at, "$", nil, start, scanner.pos - 1]
|
||||
return Reference.new(type: :at, value: "$", first_column: start, last_column: scanner.pos - 1)
|
||||
when scanner.scan(/@(\d+)/) # @1
|
||||
return [:at, Integer(scanner[1]), nil, start, scanner.pos - 1]
|
||||
return Reference.new(type: :at, value: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
|
||||
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
||||
return [:at, scanner[1], nil, start, scanner.pos - 1]
|
||||
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
||||
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
|
||||
return [:at, scanner[1], nil, start, scanner.pos - 1]
|
||||
return Reference.new(type: :at, value: scanner[1], first_column: start, last_column: scanner.pos - 1)
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def extract_references
|
||||
unless initial_action.nil?
|
||||
scanner = StringScanner.new(initial_action.s_value)
|
||||
@ -353,7 +352,6 @@ module Lrama
|
||||
end
|
||||
|
||||
initial_action.token_code.references = references
|
||||
build_references(initial_action.token_code)
|
||||
end
|
||||
|
||||
@printers.each do |printer|
|
||||
@ -369,7 +367,6 @@ module Lrama
|
||||
end
|
||||
|
||||
printer.code.token_code.references = references
|
||||
build_references(printer.code.token_code)
|
||||
end
|
||||
|
||||
@error_tokens.each do |error_token|
|
||||
@ -385,12 +382,11 @@ module Lrama
|
||||
end
|
||||
|
||||
error_token.code.token_code.references = references
|
||||
build_references(error_token.code.token_code)
|
||||
end
|
||||
|
||||
@_rules.each do |lhs, rhs, _|
|
||||
rhs.each_with_index do |token, index|
|
||||
next if token.class == Lrama::Grammar::Symbol || token.type != Lrama::Lexer::Token::User_code
|
||||
next unless token.class == Lrama::Lexer::Token::UserCode
|
||||
|
||||
scanner = StringScanner.new(token.s_value)
|
||||
references = []
|
||||
@ -407,14 +403,11 @@ module Lrama
|
||||
end
|
||||
|
||||
token.references = references
|
||||
token.numberize_references(lhs, rhs)
|
||||
build_references(token)
|
||||
numberize_references(lhs, rhs, token.references)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def find_nterm_by_id!(id)
|
||||
nterms.find do |nterm|
|
||||
nterm.id == id
|
||||
@ -428,29 +421,54 @@ module Lrama
|
||||
# @empty_symbol = term
|
||||
|
||||
# YYEOF
|
||||
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
|
||||
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYEOF"), alias_name: "\"end of file\"", token_id: 0)
|
||||
term.number = 0
|
||||
term.eof_symbol = true
|
||||
@eof_symbol = term
|
||||
|
||||
# YYerror
|
||||
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYerror"), alias_name: "error")
|
||||
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYerror"), alias_name: "error")
|
||||
term.number = 1
|
||||
term.error_symbol = true
|
||||
@error_symbol = term
|
||||
|
||||
# YYUNDEF
|
||||
term = add_term(id: Token.new(type: Token::Ident, s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
|
||||
term = add_term(id: Lrama::Lexer::Token::Ident.new(s_value: "YYUNDEF"), alias_name: "\"invalid token\"")
|
||||
term.number = 2
|
||||
term.undef_symbol = true
|
||||
@undef_symbol = term
|
||||
|
||||
# $accept
|
||||
term = add_nterm(id: Token.new(type: Token::Ident, s_value: "$accept"))
|
||||
term = add_nterm(id: Lrama::Lexer::Token::Ident.new(s_value: "$accept"))
|
||||
term.accept_symbol = true
|
||||
@accept_symbol = term
|
||||
end
|
||||
|
||||
def numberize_references(lhs, rhs, references)
|
||||
references.map! {|ref|
|
||||
ref_name = ref.value
|
||||
if ref_name.is_a?(::String) && ref_name != '$'
|
||||
value =
|
||||
if lhs.referred_by?(ref_name)
|
||||
'$'
|
||||
else
|
||||
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
||||
|
||||
if index
|
||||
index + 1
|
||||
else
|
||||
raise "'#{ref_name}' is invalid name."
|
||||
end
|
||||
end
|
||||
|
||||
ref.value = value
|
||||
ref
|
||||
else
|
||||
ref
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
# 1. Add $accept rule to the top of rules
|
||||
# 2. Extract precedence and last action
|
||||
# 3. Extract action in the middle of RHS into new Empty rule
|
||||
@ -493,7 +511,7 @@ module Lrama
|
||||
case
|
||||
when r.is_a?(Symbol) # precedence_sym
|
||||
precedence_sym = r
|
||||
when (r.type == Token::User_code) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
||||
when r.is_a?(Lrama::Lexer::Token::UserCode) && precedence_sym.nil? && code.nil? && rhs1.empty?
|
||||
code = r
|
||||
else
|
||||
rhs1 << r
|
||||
@ -503,7 +521,7 @@ module Lrama
|
||||
|
||||
# Bison n'th component is 1-origin
|
||||
(rhs1 + [code]).compact.each.with_index(1) do |token, i|
|
||||
if token.type == Token::User_code
|
||||
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
||||
token.references.each do |ref|
|
||||
# Need to keep position_in_rhs for actions in the middle of RHS
|
||||
ref.position_in_rhs = i - 1
|
||||
@ -532,9 +550,9 @@ module Lrama
|
||||
end
|
||||
|
||||
rhs2 = rhs1.map do |token|
|
||||
if token.type == Token::User_code
|
||||
if token.is_a?(Lrama::Lexer::Token::UserCode)
|
||||
prefix = token.referred ? "@" : "$@"
|
||||
new_token = Token.new(type: Token::Ident, s_value: prefix + extracted_action_number.to_s)
|
||||
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + extracted_action_number.to_s)
|
||||
extracted_action_number += 1
|
||||
a << [new_token, token]
|
||||
new_token
|
||||
@ -550,8 +568,12 @@ module Lrama
|
||||
end
|
||||
|
||||
c = code ? Code.new(type: :user_code, token_code: code) : nil
|
||||
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
|
||||
|
||||
# Expand Parameterizing rules
|
||||
if rhs2.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) }
|
||||
expand_parameterizing_rules(lhs, rhs2, c, precedence_sym, lineno)
|
||||
else
|
||||
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: rhs2, code: c, precedence_sym: precedence_sym, lineno: lineno)
|
||||
end
|
||||
add_nterm(id: lhs)
|
||||
a.each do |new_token, _|
|
||||
add_nterm(id: new_token)
|
||||
@ -559,14 +581,37 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
def expand_parameterizing_rules(lhs, rhs, code, precedence_sym, lineno)
|
||||
token = Lrama::Lexer::Token::Ident.new(s_value: rhs[0].s_value)
|
||||
if rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.option? }
|
||||
option_token = Lrama::Lexer::Token::Ident.new(s_value: "option_#{rhs[0].s_value}")
|
||||
add_term(id: option_token)
|
||||
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [option_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
@rules << Rule.new(id: @rules.count, lhs: option_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.nonempty_list? }
|
||||
nonempty_list_token = Lrama::Lexer::Token::Ident.new(s_value: "nonempty_list_#{rhs[0].s_value}")
|
||||
add_term(id: nonempty_list_token)
|
||||
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [nonempty_list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
@rules << Rule.new(id: @rules.count, lhs: nonempty_list_token, rhs: [nonempty_list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
elsif rhs.any? {|r| r.is_a?(Lrama::Lexer::Token::Parameterizing) && r.list? }
|
||||
list_token = Lrama::Lexer::Token::Ident.new(s_value: "list_#{rhs[0].s_value}")
|
||||
add_term(id: list_token)
|
||||
@rules << Rule.new(id: @rules.count, lhs: lhs, rhs: [list_token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
@rules << Rule.new(id: @rules.count, lhs: list_token, rhs: [list_token, token], code: code, precedence_sym: precedence_sym, lineno: lineno)
|
||||
end
|
||||
end
|
||||
|
||||
# Collect symbols from rules
|
||||
def collect_symbols
|
||||
@rules.flat_map(&:rhs).each do |s|
|
||||
case s
|
||||
when Token
|
||||
if s.type == Token::Char
|
||||
add_term(id: s)
|
||||
end
|
||||
when Lrama::Lexer::Token::Char
|
||||
add_term(id: s)
|
||||
when Lrama::Lexer::Token
|
||||
# skip
|
||||
when Symbol
|
||||
# skip
|
||||
else
|
||||
@ -607,7 +652,7 @@ module Lrama
|
||||
|
||||
# If id is Token::Char, it uses ASCII code
|
||||
if sym.term? && sym.token_id.nil?
|
||||
if sym.id.type == Token::Char
|
||||
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
||||
# Ignore ' on the both sides
|
||||
case sym.id.s_value[1..-2]
|
||||
when "\\b"
|
||||
@ -660,7 +705,7 @@ module Lrama
|
||||
rule.code.references.each do |ref|
|
||||
next if ref.type == :at
|
||||
|
||||
if ref.referring_symbol.type != Token::User_code
|
||||
if !ref.referring_symbol.is_a?(Lrama::Lexer::Token::UserCode)
|
||||
ref.referring_symbol = token_to_symbol(ref.referring_symbol)
|
||||
end
|
||||
end
|
||||
@ -670,7 +715,7 @@ module Lrama
|
||||
|
||||
def token_to_symbol(token)
|
||||
case token
|
||||
when Token
|
||||
when Lrama::Lexer::Token
|
||||
find_symbol_by_id!(token)
|
||||
when Symbol
|
||||
token
|
||||
@ -716,10 +761,10 @@ module Lrama
|
||||
@symbols.each do |sym|
|
||||
@printers.each do |printer|
|
||||
printer.ident_or_tags.each do |ident_or_tag|
|
||||
case ident_or_tag.type
|
||||
when Token::Ident
|
||||
case ident_or_tag
|
||||
when Lrama::Lexer::Token::Ident
|
||||
sym.printer = printer if sym.id == ident_or_tag
|
||||
when Token::Tag
|
||||
when Lrama::Lexer::Token::Tag
|
||||
sym.printer = printer if sym.tag == ident_or_tag
|
||||
else
|
||||
raise "Unknown token type. #{printer}"
|
||||
@ -733,10 +778,10 @@ module Lrama
|
||||
@symbols.each do |sym|
|
||||
@error_tokens.each do |error_token|
|
||||
error_token.ident_or_tags.each do |ident_or_tag|
|
||||
case ident_or_tag.type
|
||||
when Token::Ident
|
||||
case ident_or_tag
|
||||
when Lrama::Lexer::Token::Ident
|
||||
sym.error_token = error_token if sym.id == ident_or_tag
|
||||
when Token::Tag
|
||||
when Lrama::Lexer::Token::Tag
|
||||
sym.error_token = error_token if sym.tag == ident_or_tag
|
||||
else
|
||||
raise "Unknown token type. #{error_token}"
|
||||
|
12
tool/lrama/lib/lrama/grammar/percent_code.rb
Normal file
12
tool/lrama/lib/lrama/grammar/percent_code.rb
Normal file
@ -0,0 +1,12 @@
|
||||
module Lrama
|
||||
class Grammar
|
||||
class PercentCode
|
||||
attr_reader :id, :code
|
||||
|
||||
def initialize(id, code)
|
||||
@id = id
|
||||
@code = code
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -47,9 +47,9 @@ module Lrama
|
||||
name = "YYACCEPT"
|
||||
when eof_symbol?
|
||||
name = "YYEOF"
|
||||
when term? && id.type == Token::Char
|
||||
when term? && id.is_a?(Lrama::Lexer::Token::Char)
|
||||
name = number.to_s + display_name
|
||||
when term? && id.type == Token::Ident
|
||||
when term? && id.is_a?(Lrama::Lexer::Token::Ident)
|
||||
name = id.s_value
|
||||
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
|
||||
name = number.to_s + id.s_value
|
||||
|
@ -3,6 +3,7 @@ require "lrama/lexer/token"
|
||||
|
||||
module Lrama
|
||||
class Lexer
|
||||
attr_reader :head_line, :head_column
|
||||
attr_accessor :status
|
||||
attr_accessor :end_symbol
|
||||
|
||||
@ -24,6 +25,8 @@ module Lrama
|
||||
%precedence
|
||||
%prec
|
||||
%error-token
|
||||
%empty
|
||||
%code
|
||||
)
|
||||
|
||||
def initialize(text)
|
||||
@ -63,8 +66,6 @@ module Lrama
|
||||
when @scanner.scan(/\/\//)
|
||||
@scanner.scan_until(/\n/)
|
||||
newline
|
||||
when @scanner.scan(/%empty/)
|
||||
# noop
|
||||
else
|
||||
break
|
||||
end
|
||||
@ -80,18 +81,20 @@ module Lrama
|
||||
return [@scanner.matched, @scanner.matched]
|
||||
when @scanner.scan(/#{PERCENT_TOKENS.join('|')}/)
|
||||
return [@scanner.matched, @scanner.matched]
|
||||
when @scanner.scan(/[\?\+\*]/)
|
||||
return [@scanner.matched, @scanner.matched]
|
||||
when @scanner.scan(/<\w+>/)
|
||||
return [:TAG, build_token(type: Token::Tag, s_value: @scanner.matched)]
|
||||
return [:TAG, setup_token(Lrama::Lexer::Token::Tag.new(s_value: @scanner.matched))]
|
||||
when @scanner.scan(/'.'/)
|
||||
return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
|
||||
return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))]
|
||||
when @scanner.scan(/'\\\\'|'\\b'|'\\t'|'\\f'|'\\r'|'\\n'|'\\v'|'\\13'/)
|
||||
return [:CHARACTER, build_token(type: Token::Char, s_value: @scanner.matched)]
|
||||
return [:CHARACTER, setup_token(Lrama::Lexer::Token::Char.new(s_value: @scanner.matched))]
|
||||
when @scanner.scan(/"/)
|
||||
return [:STRING, %Q("#{@scanner.scan_until(/"/)})]
|
||||
when @scanner.scan(/\d+/)
|
||||
return [:INTEGER, Integer(@scanner.matched)]
|
||||
when @scanner.scan(/([a-zA-Z_.][-a-zA-Z0-9_.]*)/)
|
||||
token = build_token(type: Token::Ident, s_value: @scanner.matched)
|
||||
token = setup_token(Lrama::Lexer::Token::Ident.new(s_value: @scanner.matched))
|
||||
type =
|
||||
if @scanner.check(/\s*(\[\s*[a-zA-Z_.][-a-zA-Z0-9_.]*\s*\])?\s*:/)
|
||||
:IDENT_COLON
|
||||
@ -100,7 +103,7 @@ module Lrama
|
||||
end
|
||||
return [type, token]
|
||||
else
|
||||
raise
|
||||
raise ParseError, "Unexpected token: #{@scanner.peek(10).chomp}."
|
||||
end
|
||||
end
|
||||
|
||||
@ -115,13 +118,13 @@ module Lrama
|
||||
when @scanner.scan(/}/)
|
||||
if nested == 0 && @end_symbol == '}'
|
||||
@scanner.unscan
|
||||
return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
|
||||
return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))]
|
||||
else
|
||||
code += @scanner.matched
|
||||
nested -= 1
|
||||
end
|
||||
when @scanner.check(/#{@end_symbol}/)
|
||||
return [:C_DECLARATION, build_token(type: Token::User_code, s_value: code, references: [])]
|
||||
return [:C_DECLARATION, setup_token(Lrama::Lexer::Token::UserCode.new(s_value: code))]
|
||||
when @scanner.scan(/\n/)
|
||||
code += @scanner.matched
|
||||
newline
|
||||
@ -136,7 +139,7 @@ module Lrama
|
||||
code += @scanner.getch
|
||||
end
|
||||
end
|
||||
raise
|
||||
raise ParseError, "Unexpected code: #{code}."
|
||||
end
|
||||
|
||||
private
|
||||
@ -155,13 +158,9 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
def build_token(type:, s_value:, **options)
|
||||
token = Token.new(type: type, s_value: s_value)
|
||||
def setup_token(token)
|
||||
token.line = @head_line
|
||||
token.column = @head_column
|
||||
options.each do |attr, value|
|
||||
token.public_send("#{attr}=", value)
|
||||
end
|
||||
|
||||
token
|
||||
end
|
||||
|
@ -1,84 +1,26 @@
|
||||
require 'lrama/lexer/token/type'
|
||||
|
||||
module Lrama
|
||||
class Lexer
|
||||
class Token
|
||||
class Token < Struct.new(:s_value, :alias_name, keyword_init: true)
|
||||
|
||||
attr_accessor :line, :column, :referred
|
||||
# For User_code
|
||||
attr_accessor :references
|
||||
|
||||
def to_s
|
||||
"#{super} line: #{line}, column: #{column}"
|
||||
end
|
||||
|
||||
def referred_by?(string)
|
||||
[self.s_value, self.alias].include?(string)
|
||||
[self.s_value, self.alias_name].include?(string)
|
||||
end
|
||||
|
||||
def ==(other)
|
||||
self.class == other.class && self.type == other.type && self.s_value == other.s_value
|
||||
self.class == other.class && self.s_value == other.s_value
|
||||
end
|
||||
|
||||
def numberize_references(lhs, rhs)
|
||||
self.references.map! {|ref|
|
||||
ref_name = ref[1]
|
||||
if ref_name.is_a?(::String) && ref_name != '$'
|
||||
value =
|
||||
if lhs.referred_by?(ref_name)
|
||||
'$'
|
||||
else
|
||||
index = rhs.find_index {|token| token.referred_by?(ref_name) }
|
||||
|
||||
if index
|
||||
index + 1
|
||||
else
|
||||
raise "'#{ref_name}' is invalid name."
|
||||
end
|
||||
end
|
||||
[ref[0], value, ref[2], ref[3], ref[4]]
|
||||
else
|
||||
ref
|
||||
end
|
||||
}
|
||||
end
|
||||
|
||||
@i = 0
|
||||
@types = []
|
||||
|
||||
def self.define_type(name)
|
||||
type = Type.new(id: @i, name: name.to_s)
|
||||
const_set(name, type)
|
||||
@types << type
|
||||
@i += 1
|
||||
end
|
||||
|
||||
# Token types
|
||||
define_type(:P_expect) # %expect
|
||||
define_type(:P_define) # %define
|
||||
define_type(:P_printer) # %printer
|
||||
define_type(:P_error_token) # %error-token
|
||||
define_type(:P_lex_param) # %lex-param
|
||||
define_type(:P_parse_param) # %parse-param
|
||||
define_type(:P_initial_action) # %initial-action
|
||||
define_type(:P_union) # %union
|
||||
define_type(:P_token) # %token
|
||||
define_type(:P_type) # %type
|
||||
define_type(:P_nonassoc) # %nonassoc
|
||||
define_type(:P_left) # %left
|
||||
define_type(:P_right) # %right
|
||||
define_type(:P_precedence) # %precedence
|
||||
define_type(:P_prec) # %prec
|
||||
define_type(:User_code) # { ... }
|
||||
define_type(:Tag) # <int>
|
||||
define_type(:Number) # 0
|
||||
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
|
||||
define_type(:Ident) # api.pure, tNUMBER
|
||||
define_type(:Named_Ref) # [foo]
|
||||
define_type(:Semicolon) # ;
|
||||
define_type(:Bar) # |
|
||||
define_type(:String) # "str"
|
||||
define_type(:Char) # '+'
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
require 'lrama/lexer/token/char'
|
||||
require 'lrama/lexer/token/ident'
|
||||
require 'lrama/lexer/token/parameterizing'
|
||||
require 'lrama/lexer/token/tag'
|
||||
require 'lrama/lexer/token/user_code'
|
||||
|
8
tool/lrama/lib/lrama/lexer/token/char.rb
Normal file
8
tool/lrama/lib/lrama/lexer/token/char.rb
Normal file
@ -0,0 +1,8 @@
|
||||
module Lrama
|
||||
class Lexer
|
||||
class Token
|
||||
class Char < Token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
8
tool/lrama/lib/lrama/lexer/token/ident.rb
Normal file
8
tool/lrama/lib/lrama/lexer/token/ident.rb
Normal file
@ -0,0 +1,8 @@
|
||||
module Lrama
|
||||
class Lexer
|
||||
class Token
|
||||
class Ident < Token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
19
tool/lrama/lib/lrama/lexer/token/parameterizing.rb
Normal file
19
tool/lrama/lib/lrama/lexer/token/parameterizing.rb
Normal file
@ -0,0 +1,19 @@
|
||||
module Lrama
|
||||
class Lexer
|
||||
class Token
|
||||
class Parameterizing < Token
|
||||
def option?
|
||||
self.s_value == "?"
|
||||
end
|
||||
|
||||
def nonempty_list?
|
||||
self.s_value == "+"
|
||||
end
|
||||
|
||||
def list?
|
||||
self.s_value == "*"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
8
tool/lrama/lib/lrama/lexer/token/tag.rb
Normal file
8
tool/lrama/lib/lrama/lexer/token/tag.rb
Normal file
@ -0,0 +1,8 @@
|
||||
module Lrama
|
||||
class Lexer
|
||||
class Token
|
||||
class Tag < Token
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -1,8 +0,0 @@
|
||||
module Lrama
|
||||
class Lexer
|
||||
class Token < Struct.new(:type, :s_value, :alias, keyword_init: true)
|
||||
class Type < Struct.new(:id, :name, keyword_init: true)
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
14
tool/lrama/lib/lrama/lexer/token/user_code.rb
Normal file
14
tool/lrama/lib/lrama/lexer/token/user_code.rb
Normal file
@ -0,0 +1,14 @@
|
||||
module Lrama
|
||||
class Lexer
|
||||
class Token
|
||||
class UserCode < Token
|
||||
attr_accessor :references
|
||||
|
||||
def initialize(s_value: nil, alias_name: nil)
|
||||
super
|
||||
self.references = []
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -58,6 +58,7 @@ module Lrama
|
||||
o.separator 'Tuning the Parser:'
|
||||
o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v }
|
||||
o.on('-t', 'reserved, do nothing') { }
|
||||
o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true }
|
||||
o.separator ''
|
||||
o.separator 'Output:'
|
||||
o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v }
|
||||
@ -108,7 +109,7 @@ module Lrama
|
||||
def validate_trace(trace)
|
||||
list = %w[
|
||||
none locations scan parse automaton bitsets
|
||||
closure grammar resource sets muscles tools
|
||||
closure grammar rules resource sets muscles tools
|
||||
m4-early m4 skeleton time ielr cex all
|
||||
]
|
||||
h = {}
|
||||
|
@ -4,7 +4,8 @@ module Lrama
|
||||
attr_accessor :skeleton, :header, :header_file,
|
||||
:report_file, :outfile,
|
||||
:error_recovery, :grammar_file,
|
||||
:report_file, :trace_opts, :report_opts, :y
|
||||
:report_file, :trace_opts, :report_opts, :y,
|
||||
:debug
|
||||
|
||||
def initialize
|
||||
@skeleton = "bison/yacc.c"
|
||||
|
@ -349,6 +349,15 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
# b4_percent_code_get
|
||||
def percent_code(name)
|
||||
@grammar.percent_codes.select do |percent_code|
|
||||
percent_code.id.s_value == name
|
||||
end.map do |percent_code|
|
||||
percent_code.code.s_value
|
||||
end.join
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def eval_template(file, path)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,3 @@
|
||||
module Lrama
|
||||
VERSION = "0.5.8".freeze
|
||||
VERSION = "0.5.9".freeze
|
||||
end
|
||||
|
@ -12,7 +12,7 @@
|
||||
#if YYDEBUG && !defined(yydebug)
|
||||
extern int yydebug;
|
||||
#endif
|
||||
<%-# b4_percent_code_get([[requires]]). %code is not supported -%>
|
||||
<%= output.percent_code("requires") %>
|
||||
|
||||
<%-# b4_token_enums_defines -%>
|
||||
/* Token kinds. */
|
||||
@ -64,7 +64,7 @@ struct YYLTYPE
|
||||
int yyparse (<%= output.parse_param %>);
|
||||
|
||||
|
||||
<%-# b4_percent_code_get([[provides]]). %code is not supported -%>
|
||||
<%= output.percent_code("provides") %>
|
||||
<%-# b4_cpp_guard_close([b4_spec_mapped_header_file]) -%>
|
||||
<%- if output.spec_mapped_header_file -%>
|
||||
#endif /* !<%= output.b4_cpp_guard__b4_spec_mapped_header_file %> */
|
||||
|
@ -68,8 +68,6 @@
|
||||
#define YYPULL 1
|
||||
|
||||
|
||||
|
||||
|
||||
<%# b4_user_pre_prologue -%>
|
||||
/* First part of user prologue. */
|
||||
#line <%= output.aux.prologue_first_lineno %> "<%= output.grammar_file_path %>"
|
||||
|
Loading…
x
Reference in New Issue
Block a user