Lrama v0.6.3
This commit is contained in:
parent
95ec71efc5
commit
06ad00adc2
@ -1,5 +1,39 @@
|
||||
# NEWS for Lrama
|
||||
|
||||
## Lrama 0.6.3 (2024-02-15)
|
||||
|
||||
### Bring Your Own Stack
|
||||
|
||||
Provide functionalities for Bring Your Own Stack.
|
||||
|
||||
Ruby’s Ripper library requires their own semantic value stack to manage Ruby Objects returned by user defined callback method. Currently Ripper uses semantic value stack (`yyvsa`) which is used by parser to manage Node. This hack introduces some limitation on Ripper. For example, Ripper can not execute semantic analysis depending on Node structure.
|
||||
|
||||
Lrama introduces two features to support another semantic value stack by parser generator users.
|
||||
|
||||
1. Callback entry points
|
||||
|
||||
User can emulate semantic value stack by these callbacks.
|
||||
Lrama provides these five callbacks. Registered functions are called when each event happen. For example %after-shift function is called when shift happens on original semantic value stack.
|
||||
|
||||
* `%after-shift` function_name
|
||||
* `%before-reduce` function_name
|
||||
* `%after-reduce` function_name
|
||||
* `%after-shift-error-token` function_name
|
||||
* `%after-pop-stack` function_name
|
||||
|
||||
2. `$:n` variable to access index of each grammar symbols
|
||||
|
||||
User also needs to access semantic value of their stack in grammar action. `$:n` provides the way to access to it. `$:n` is translated to the minus index from the top of the stack.
|
||||
For example
|
||||
|
||||
```
|
||||
primary: k_if expr_value then compstmt if_tail k_end
|
||||
{
|
||||
/*% ripper: if!($:2, $:4, $:5) %*/
|
||||
/* $:2 = -5, $:4 = -3, $:5 = -2. */
|
||||
}
|
||||
```
|
||||
|
||||
## Lrama 0.6.2 (2024-01-27)
|
||||
|
||||
### %no-stdlib directive
|
||||
|
@ -265,9 +265,9 @@ module Lrama
|
||||
|
||||
s = actions.each_with_index.map do |n, i|
|
||||
[i, n]
|
||||
end.select do |i, n|
|
||||
end.reject do |i, n|
|
||||
# Remove default_reduction_rule entries
|
||||
n != 0
|
||||
n == 0
|
||||
end
|
||||
|
||||
if s.count != 0
|
||||
@ -462,7 +462,7 @@ module Lrama
|
||||
@yylast = high
|
||||
|
||||
# replace_ninf
|
||||
@yypact_ninf = (@base.select {|i| i != BaseMin } + [0]).min - 1
|
||||
@yypact_ninf = (@base.reject {|i| i == BaseMin } + [0]).min - 1
|
||||
@base.map! do |i|
|
||||
case i
|
||||
when BaseMin
|
||||
@ -472,7 +472,7 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
@yytable_ninf = (@table.compact.select {|i| i != ErrorActionNumber } + [0]).min - 1
|
||||
@yytable_ninf = (@table.compact.reject {|i| i == ErrorActionNumber } + [0]).min - 1
|
||||
@table.map! do |i|
|
||||
case i
|
||||
when nil
|
||||
|
@ -1,16 +1,18 @@
|
||||
require "forwardable"
|
||||
require "lrama/grammar/auxiliary"
|
||||
require "lrama/grammar/binding"
|
||||
require "lrama/grammar/code"
|
||||
require "lrama/grammar/counter"
|
||||
require "lrama/grammar/error_token"
|
||||
require "lrama/grammar/parameterizing_rule"
|
||||
require "lrama/grammar/percent_code"
|
||||
require "lrama/grammar/precedence"
|
||||
require "lrama/grammar/printer"
|
||||
require "lrama/grammar/reference"
|
||||
require "lrama/grammar/rule"
|
||||
require "lrama/grammar/rule_builder"
|
||||
require "lrama/grammar/parameterizing_rule"
|
||||
require "lrama/grammar/symbol"
|
||||
require "lrama/grammar/symbols"
|
||||
require "lrama/grammar/type"
|
||||
require "lrama/grammar/union"
|
||||
require "lrama/lexer"
|
||||
@ -18,14 +20,23 @@ require "lrama/lexer"
|
||||
module Lrama
|
||||
# Grammar is the result of parsing an input grammar file
|
||||
class Grammar
|
||||
extend Forwardable
|
||||
|
||||
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
|
||||
attr_accessor :union, :expect,
|
||||
:printers, :error_tokens,
|
||||
:lex_param, :parse_param, :initial_action,
|
||||
:symbols, :types,
|
||||
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
|
||||
:symbols_resolver, :types,
|
||||
:rules, :rule_builders,
|
||||
:sym_to_rules, :no_stdlib
|
||||
|
||||
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
|
||||
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
|
||||
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
|
||||
:fill_printer, :fill_error_token, :sort_by_number!
|
||||
|
||||
|
||||
def initialize(rule_counter)
|
||||
@rule_counter = rule_counter
|
||||
|
||||
@ -33,7 +44,7 @@ module Lrama
|
||||
@percent_codes = []
|
||||
@printers = []
|
||||
@error_tokens = []
|
||||
@symbols = []
|
||||
@symbols_resolver = Grammar::Symbols::Resolver.new
|
||||
@types = []
|
||||
@rule_builders = []
|
||||
@rules = []
|
||||
@ -62,44 +73,6 @@ module Lrama
|
||||
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
|
||||
end
|
||||
|
||||
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
||||
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
|
||||
if replace
|
||||
sym.id = id
|
||||
sym.alias_name = alias_name
|
||||
sym.tag = tag
|
||||
end
|
||||
|
||||
return sym
|
||||
end
|
||||
|
||||
if (sym = @symbols.find {|s| s.id == id })
|
||||
return sym
|
||||
end
|
||||
|
||||
sym = Symbol.new(
|
||||
id: id, alias_name: alias_name, number: nil, tag: tag,
|
||||
term: true, token_id: token_id, nullable: false
|
||||
)
|
||||
@symbols << sym
|
||||
@terms = nil
|
||||
|
||||
return sym
|
||||
end
|
||||
|
||||
def add_nterm(id:, alias_name: nil, tag: nil)
|
||||
return if @symbols.find {|s| s.id == id }
|
||||
|
||||
sym = Symbol.new(
|
||||
id: id, alias_name: alias_name, number: nil, tag: tag,
|
||||
term: false, token_id: nil, nullable: nil,
|
||||
)
|
||||
@symbols << sym
|
||||
@nterms = nil
|
||||
|
||||
return sym
|
||||
end
|
||||
|
||||
def add_type(id:, tag:)
|
||||
@types << Type.new(id: id, tag: tag)
|
||||
end
|
||||
@ -165,13 +138,9 @@ module Lrama
|
||||
normalize_rules
|
||||
collect_symbols
|
||||
set_lhs_and_rhs
|
||||
fill_symbol_number
|
||||
fill_default_precedence
|
||||
fill_symbols
|
||||
fill_sym_to_rules
|
||||
fill_nterm_type
|
||||
fill_symbol_printer
|
||||
fill_symbol_error_token
|
||||
@symbols.sort_by!(&:number)
|
||||
compute_nullable
|
||||
compute_first_set
|
||||
end
|
||||
@ -180,40 +149,10 @@ module Lrama
|
||||
#
|
||||
# * Validation for no_declared_type_reference
|
||||
def validate!
|
||||
validate_symbol_number_uniqueness!
|
||||
validate_symbol_alias_name_uniqueness!
|
||||
@symbols_resolver.validate!
|
||||
validate_rule_lhs_is_nterm!
|
||||
end
|
||||
|
||||
def find_symbol_by_s_value(s_value)
|
||||
@symbols.find do |sym|
|
||||
sym.id.s_value == s_value
|
||||
end
|
||||
end
|
||||
|
||||
def find_symbol_by_s_value!(s_value)
|
||||
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
||||
end
|
||||
|
||||
def find_symbol_by_id(id)
|
||||
@symbols.find do |sym|
|
||||
sym.id == id || sym.alias_name == id.s_value
|
||||
end
|
||||
end
|
||||
|
||||
def find_symbol_by_id!(id)
|
||||
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
||||
end
|
||||
|
||||
def find_symbol_by_number!(number)
|
||||
sym = @symbols[number]
|
||||
|
||||
raise "Symbol not found: #{number}" unless sym
|
||||
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
||||
|
||||
sym
|
||||
end
|
||||
|
||||
def find_rules_by_symbol!(sym)
|
||||
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
|
||||
end
|
||||
@ -222,22 +161,6 @@ module Lrama
|
||||
@sym_to_rules[sym.number]
|
||||
end
|
||||
|
||||
def terms_count
|
||||
terms.count
|
||||
end
|
||||
|
||||
def terms
|
||||
@terms ||= @symbols.select(&:term?)
|
||||
end
|
||||
|
||||
def nterms_count
|
||||
nterms.count
|
||||
end
|
||||
|
||||
def nterms
|
||||
@nterms ||= @symbols.select(&:nterm?)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def compute_nullable
|
||||
@ -284,7 +207,7 @@ module Lrama
|
||||
rule.nullable = false
|
||||
end
|
||||
|
||||
nterms.select {|r| r.nullable.nil? }.each do |nterm|
|
||||
nterms.select {|e| e.nullable.nil? }.each do |nterm|
|
||||
nterm.nullable = false
|
||||
end
|
||||
end
|
||||
@ -330,12 +253,6 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
def find_nterm_by_id!(id)
|
||||
nterms.find do |nterm|
|
||||
nterm.id == id
|
||||
end || (raise "Nterm not found: #{id}")
|
||||
end
|
||||
|
||||
def append_special_symbols
|
||||
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
|
||||
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
|
||||
@ -397,79 +314,6 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
# Fill #number and #token_id
|
||||
def fill_symbol_number
|
||||
# Character literal in grammar file has
|
||||
# token id corresponding to ASCII code by default,
|
||||
# so start token_id from 256.
|
||||
token_id = 256
|
||||
|
||||
# YYEMPTY = -2
|
||||
# YYEOF = 0
|
||||
# YYerror = 1
|
||||
# YYUNDEF = 2
|
||||
number = 3
|
||||
|
||||
nterm_token_id = 0
|
||||
used_numbers = {}
|
||||
|
||||
@symbols.map(&:number).each do |n|
|
||||
used_numbers[n] = true
|
||||
end
|
||||
|
||||
(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
|
||||
while used_numbers[number] do
|
||||
number += 1
|
||||
end
|
||||
|
||||
if sym.number.nil?
|
||||
sym.number = number
|
||||
number += 1
|
||||
end
|
||||
|
||||
# If id is Token::Char, it uses ASCII code
|
||||
if sym.term? && sym.token_id.nil?
|
||||
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
||||
# Ignore ' on the both sides
|
||||
case sym.id.s_value[1..-2]
|
||||
when "\\b"
|
||||
sym.token_id = 8
|
||||
when "\\f"
|
||||
sym.token_id = 12
|
||||
when "\\n"
|
||||
sym.token_id = 10
|
||||
when "\\r"
|
||||
sym.token_id = 13
|
||||
when "\\t"
|
||||
sym.token_id = 9
|
||||
when "\\v"
|
||||
sym.token_id = 11
|
||||
when "\""
|
||||
sym.token_id = 34
|
||||
when "'"
|
||||
sym.token_id = 39
|
||||
when "\\\\"
|
||||
sym.token_id = 92
|
||||
when /\A\\(\d+)\z/
|
||||
sym.token_id = Integer($1, 8)
|
||||
when /\A(.)\z/
|
||||
sym.token_id = $1.bytes.first
|
||||
else
|
||||
raise "Unknown Char s_value #{sym}"
|
||||
end
|
||||
else
|
||||
sym.token_id = token_id
|
||||
token_id += 1
|
||||
end
|
||||
end
|
||||
|
||||
if sym.nterm? && sym.token_id.nil?
|
||||
sym.token_id = nterm_token_id
|
||||
nterm_token_id += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def set_lhs_and_rhs
|
||||
@rules.each do |rule|
|
||||
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
|
||||
@ -480,15 +324,6 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
def token_to_symbol(token)
|
||||
case token
|
||||
when Lrama::Lexer::Token
|
||||
find_symbol_by_id!(token)
|
||||
else
|
||||
raise "Unknown class: #{token}"
|
||||
end
|
||||
end
|
||||
|
||||
# Rule inherits precedence from the last term in RHS.
|
||||
#
|
||||
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
|
||||
@ -506,6 +341,14 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
def fill_symbols
|
||||
fill_symbol_number
|
||||
fill_nterm_type(@types)
|
||||
fill_printer(@printers)
|
||||
fill_error_token(@error_tokens)
|
||||
sort_by_number!
|
||||
end
|
||||
|
||||
def fill_sym_to_rules
|
||||
@rules.each do |rule|
|
||||
key = rule.lhs.number
|
||||
@ -514,68 +357,6 @@ module Lrama
|
||||
end
|
||||
end
|
||||
|
||||
# Fill nterm's tag defined by %type decl
|
||||
def fill_nterm_type
|
||||
@types.each do |type|
|
||||
nterm = find_nterm_by_id!(type.id)
|
||||
nterm.tag = type.tag
|
||||
end
|
||||
end
|
||||
|
||||
def fill_symbol_printer
|
||||
@symbols.each do |sym|
|
||||
@printers.each do |printer|
|
||||
printer.ident_or_tags.each do |ident_or_tag|
|
||||
case ident_or_tag
|
||||
when Lrama::Lexer::Token::Ident
|
||||
sym.printer = printer if sym.id == ident_or_tag
|
||||
when Lrama::Lexer::Token::Tag
|
||||
sym.printer = printer if sym.tag == ident_or_tag
|
||||
else
|
||||
raise "Unknown token type. #{printer}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def fill_symbol_error_token
|
||||
@symbols.each do |sym|
|
||||
@error_tokens.each do |error_token|
|
||||
error_token.ident_or_tags.each do |ident_or_tag|
|
||||
case ident_or_tag
|
||||
when Lrama::Lexer::Token::Ident
|
||||
sym.error_token = error_token if sym.id == ident_or_tag
|
||||
when Lrama::Lexer::Token::Tag
|
||||
sym.error_token = error_token if sym.tag == ident_or_tag
|
||||
else
|
||||
raise "Unknown token type. #{error_token}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def validate_symbol_number_uniqueness!
|
||||
invalid = @symbols.group_by(&:number).select do |number, syms|
|
||||
syms.count > 1
|
||||
end
|
||||
|
||||
return if invalid.empty?
|
||||
|
||||
raise "Symbol number is duplicated. #{invalid}"
|
||||
end
|
||||
|
||||
def validate_symbol_alias_name_uniqueness!
|
||||
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
||||
syms.count > 1
|
||||
end
|
||||
|
||||
return if invalid.empty?
|
||||
|
||||
raise "Symbol alias name is duplicated. #{invalid}"
|
||||
end
|
||||
|
||||
def validate_rule_lhs_is_nterm!
|
||||
errors = []
|
||||
|
||||
|
@ -6,18 +6,24 @@ module Lrama
|
||||
|
||||
# * ($$) yylval
|
||||
# * (@$) yylloc
|
||||
# * ($:$) error
|
||||
# * ($1) error
|
||||
# * (@1) error
|
||||
# * ($:1) error
|
||||
def reference_to_c(ref)
|
||||
case
|
||||
when ref.type == :dollar && ref.name == "$" # $$
|
||||
"yylval"
|
||||
when ref.type == :at && ref.name == "$" # @$
|
||||
"yylloc"
|
||||
when ref.type == :index && ref.name == "$" # $:$
|
||||
raise "$:#{ref.value} can not be used in initial_action."
|
||||
when ref.type == :dollar # $n
|
||||
raise "$#{ref.value} can not be used in initial_action."
|
||||
when ref.type == :at # @n
|
||||
raise "@#{ref.value} can not be used in initial_action."
|
||||
when ref.type == :index # $:n
|
||||
raise "$:#{ref.value} can not be used in initial_action."
|
||||
else
|
||||
raise "Unexpected. #{self}, #{ref}"
|
||||
end
|
||||
|
@ -6,14 +6,18 @@ module Lrama
|
||||
|
||||
# * ($$) error
|
||||
# * (@$) error
|
||||
# * ($:$) error
|
||||
# * ($1) error
|
||||
# * (@1) error
|
||||
# * ($:1) error
|
||||
def reference_to_c(ref)
|
||||
case
|
||||
when ref.type == :dollar # $$, $n
|
||||
raise "$#{ref.value} can not be used in #{type}."
|
||||
when ref.type == :at # @$, @n
|
||||
raise "@#{ref.value} can not be used in #{type}."
|
||||
when ref.type == :index # $:$, $:n
|
||||
raise "$:#{ref.value} can not be used in #{type}."
|
||||
else
|
||||
raise "Unexpected. #{self}, #{ref}"
|
||||
end
|
||||
|
@ -11,8 +11,10 @@ module Lrama
|
||||
|
||||
# * ($$) *yyvaluep
|
||||
# * (@$) *yylocationp
|
||||
# * ($:$) error
|
||||
# * ($1) error
|
||||
# * (@1) error
|
||||
# * ($:1) error
|
||||
def reference_to_c(ref)
|
||||
case
|
||||
when ref.type == :dollar && ref.name == "$" # $$
|
||||
@ -20,10 +22,14 @@ module Lrama
|
||||
"((*yyvaluep).#{member})"
|
||||
when ref.type == :at && ref.name == "$" # @$
|
||||
"(*yylocationp)"
|
||||
when ref.type == :index && ref.name == "$" # $:$
|
||||
raise "$:#{ref.value} can not be used in #{type}."
|
||||
when ref.type == :dollar # $n
|
||||
raise "$#{ref.value} can not be used in #{type}."
|
||||
when ref.type == :at # @n
|
||||
raise "@#{ref.value} can not be used in #{type}."
|
||||
when ref.type == :index # $:n
|
||||
raise "$:#{ref.value} can not be used in #{type}."
|
||||
else
|
||||
raise "Unexpected. #{self}, #{ref}"
|
||||
end
|
||||
|
@ -11,8 +11,10 @@ module Lrama
|
||||
|
||||
# * ($$) yyval
|
||||
# * (@$) yyloc
|
||||
# * ($:$) error
|
||||
# * ($1) yyvsp[i]
|
||||
# * (@1) yylsp[i]
|
||||
# * ($:1) i - 1
|
||||
#
|
||||
#
|
||||
# Consider a rule like
|
||||
@ -24,6 +26,8 @@ module Lrama
|
||||
# "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
|
||||
# "Position in grammar" $1 $2 $3 $4 $5
|
||||
# "Index for yyvsp" -4 -3 -2 -1 0
|
||||
# "$:n" $:1 $:2 $:3 $:4 $:5
|
||||
# "index of $:n" -5 -4 -3 -2 -1
|
||||
#
|
||||
#
|
||||
# For the first midrule action:
|
||||
@ -31,6 +35,7 @@ module Lrama
|
||||
# "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
|
||||
# "Position in grammar" $1
|
||||
# "Index for yyvsp" 0
|
||||
# "$:n" $:1
|
||||
def reference_to_c(ref)
|
||||
case
|
||||
when ref.type == :dollar && ref.name == "$" # $$
|
||||
@ -39,6 +44,8 @@ module Lrama
|
||||
"(yyval.#{tag.member})"
|
||||
when ref.type == :at && ref.name == "$" # @$
|
||||
"(yyloc)"
|
||||
when ref.type == :index && ref.name == "$" # $:$
|
||||
raise "$:$ is not supported"
|
||||
when ref.type == :dollar # $n
|
||||
i = -position_in_rhs + ref.index
|
||||
tag = ref.ex_tag || rhs[ref.index - 1].tag
|
||||
@ -47,6 +54,9 @@ module Lrama
|
||||
when ref.type == :at # @n
|
||||
i = -position_in_rhs + ref.index
|
||||
"(yylsp[#{i}])"
|
||||
when ref.type == :index # $:n
|
||||
i = -position_in_rhs + ref.index
|
||||
"(#{i} - 1)"
|
||||
else
|
||||
raise "Unexpected. #{self}, #{ref}"
|
||||
end
|
||||
@ -70,7 +80,7 @@ module Lrama
|
||||
end
|
||||
|
||||
def raise_tag_not_found_error(ref)
|
||||
raise "Tag is not specified for '$#{ref.value}' in '#{@rule.to_s}'"
|
||||
raise "Tag is not specified for '$#{ref.value}' in '#{@rule}'"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -2,11 +2,12 @@ module Lrama
|
||||
class Grammar
|
||||
# type: :dollar or :at
|
||||
# name: String (e.g. $$, $foo, $expr.right)
|
||||
# index: Integer (e.g. $1)
|
||||
# number: Integer (e.g. $1)
|
||||
# index: Integer
|
||||
# ex_tag: "$<tag>1" (Optional)
|
||||
class Reference < Struct.new(:type, :name, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
|
||||
class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
|
||||
def value
|
||||
name || index
|
||||
name || number
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -181,11 +181,18 @@ module Lrama
|
||||
if referring_symbol[1] == 0 # Refers to LHS
|
||||
ref.name = '$'
|
||||
else
|
||||
ref.index = referring_symbol[1]
|
||||
ref.number = referring_symbol[1]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
if ref.number
|
||||
# TODO: When Inlining is implemented, for example, if `$1` is expanded to multiple RHS tokens,
|
||||
# `$2` needs to access `$2 + n` to actually access it. So, after the Inlining implementation,
|
||||
# it needs resolves from number to index.
|
||||
ref.index = ref.number
|
||||
end
|
||||
|
||||
# TODO: Need to check index of @ too?
|
||||
next if ref.type == :at
|
||||
|
||||
|
@ -11,7 +11,7 @@ module Lrama
|
||||
attr_reader :term
|
||||
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
|
||||
|
||||
def initialize(id:, alias_name: nil, number: nil, tag: nil, term:, token_id: nil, nullable: nil, precedence: nil, printer: nil)
|
||||
def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil, nullable: nil, precedence: nil, printer: nil)
|
||||
@id = id
|
||||
@alias_name = alias_name
|
||||
@number = number
|
||||
|
1
tool/lrama/lib/lrama/grammar/symbols.rb
Normal file
1
tool/lrama/lib/lrama/grammar/symbols.rb
Normal file
@ -0,0 +1 @@
|
||||
require_relative "symbols/resolver"
|
276
tool/lrama/lib/lrama/grammar/symbols/resolver.rb
Normal file
276
tool/lrama/lib/lrama/grammar/symbols/resolver.rb
Normal file
@ -0,0 +1,276 @@
|
||||
module Lrama
|
||||
class Grammar
|
||||
class Symbols
|
||||
class Resolver
|
||||
attr_reader :terms, :nterms
|
||||
|
||||
def initialize
|
||||
@terms = []
|
||||
@nterms = []
|
||||
end
|
||||
|
||||
def symbols
|
||||
@symbols ||= (@terms + @nterms)
|
||||
end
|
||||
|
||||
def sort_by_number!
|
||||
symbols.sort_by!(&:number)
|
||||
end
|
||||
|
||||
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
|
||||
if token_id && (sym = find_symbol_by_token_id(token_id))
|
||||
if replace
|
||||
sym.id = id
|
||||
sym.alias_name = alias_name
|
||||
sym.tag = tag
|
||||
end
|
||||
|
||||
return sym
|
||||
end
|
||||
|
||||
if (sym = find_symbol_by_id(id))
|
||||
return sym
|
||||
end
|
||||
|
||||
@symbols = nil
|
||||
term = Symbol.new(
|
||||
id: id, alias_name: alias_name, number: nil, tag: tag,
|
||||
term: true, token_id: token_id, nullable: false
|
||||
)
|
||||
@terms << term
|
||||
term
|
||||
end
|
||||
|
||||
def add_nterm(id:, alias_name: nil, tag: nil)
|
||||
return if find_symbol_by_id(id)
|
||||
|
||||
@symbols = nil
|
||||
nterm = Symbol.new(
|
||||
id: id, alias_name: alias_name, number: nil, tag: tag,
|
||||
term: false, token_id: nil, nullable: nil,
|
||||
)
|
||||
@nterms << nterm
|
||||
nterm
|
||||
end
|
||||
|
||||
def find_symbol_by_s_value(s_value)
|
||||
symbols.find { |s| s.id.s_value == s_value }
|
||||
end
|
||||
|
||||
def find_symbol_by_s_value!(s_value)
|
||||
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
|
||||
end
|
||||
|
||||
def find_symbol_by_id(id)
|
||||
symbols.find do |s|
|
||||
s.id == id || s.alias_name == id.s_value
|
||||
end
|
||||
end
|
||||
|
||||
def find_symbol_by_id!(id)
|
||||
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
|
||||
end
|
||||
|
||||
def find_symbol_by_token_id(token_id)
|
||||
symbols.find {|s| s.token_id == token_id }
|
||||
end
|
||||
|
||||
def find_symbol_by_number!(number)
|
||||
sym = symbols[number]
|
||||
|
||||
raise "Symbol not found: #{number}" unless sym
|
||||
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
|
||||
|
||||
sym
|
||||
end
|
||||
|
||||
def fill_symbol_number
|
||||
# YYEMPTY = -2
|
||||
# YYEOF = 0
|
||||
# YYerror = 1
|
||||
# YYUNDEF = 2
|
||||
@number = 3
|
||||
fill_terms_number
|
||||
fill_nterms_number
|
||||
end
|
||||
|
||||
def fill_nterm_type(types)
|
||||
types.each do |type|
|
||||
nterm = find_nterm_by_id!(type.id)
|
||||
nterm.tag = type.tag
|
||||
end
|
||||
end
|
||||
|
||||
def fill_printer(printers)
|
||||
symbols.each do |sym|
|
||||
printers.each do |printer|
|
||||
printer.ident_or_tags.each do |ident_or_tag|
|
||||
case ident_or_tag
|
||||
when Lrama::Lexer::Token::Ident
|
||||
sym.printer = printer if sym.id == ident_or_tag
|
||||
when Lrama::Lexer::Token::Tag
|
||||
sym.printer = printer if sym.tag == ident_or_tag
|
||||
else
|
||||
raise "Unknown token type. #{printer}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def fill_error_token(error_tokens)
|
||||
symbols.each do |sym|
|
||||
error_tokens.each do |token|
|
||||
token.ident_or_tags.each do |ident_or_tag|
|
||||
case ident_or_tag
|
||||
when Lrama::Lexer::Token::Ident
|
||||
sym.error_token = token if sym.id == ident_or_tag
|
||||
when Lrama::Lexer::Token::Tag
|
||||
sym.error_token = token if sym.tag == ident_or_tag
|
||||
else
|
||||
raise "Unknown token type. #{token}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def token_to_symbol(token)
|
||||
case token
|
||||
when Lrama::Lexer::Token
|
||||
find_symbol_by_id!(token)
|
||||
else
|
||||
raise "Unknown class: #{token}"
|
||||
end
|
||||
end
|
||||
|
||||
def validate!
|
||||
validate_number_uniqueness!
|
||||
validate_alias_name_uniqueness!
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def find_nterm_by_id!(id)
|
||||
@nterms.find do |s|
|
||||
s.id == id
|
||||
end || (raise "Symbol not found: #{id}")
|
||||
end
|
||||
|
||||
def fill_terms_number
|
||||
# Character literal in grammar file has
|
||||
# token id corresponding to ASCII code by default,
|
||||
# so start token_id from 256.
|
||||
token_id = 256
|
||||
|
||||
@terms.each do |sym|
|
||||
while used_numbers[@number] do
|
||||
@number += 1
|
||||
end
|
||||
|
||||
if sym.number.nil?
|
||||
sym.number = @number
|
||||
used_numbers[@number] = true
|
||||
@number += 1
|
||||
end
|
||||
|
||||
# If id is Token::Char, it uses ASCII code
|
||||
if sym.token_id.nil?
|
||||
if sym.id.is_a?(Lrama::Lexer::Token::Char)
|
||||
# Ignore ' on the both sides
|
||||
case sym.id.s_value[1..-2]
|
||||
when "\\b"
|
||||
sym.token_id = 8
|
||||
when "\\f"
|
||||
sym.token_id = 12
|
||||
when "\\n"
|
||||
sym.token_id = 10
|
||||
when "\\r"
|
||||
sym.token_id = 13
|
||||
when "\\t"
|
||||
sym.token_id = 9
|
||||
when "\\v"
|
||||
sym.token_id = 11
|
||||
when "\""
|
||||
sym.token_id = 34
|
||||
when "'"
|
||||
sym.token_id = 39
|
||||
when "\\\\"
|
||||
sym.token_id = 92
|
||||
when /\A\\(\d+)\z/
|
||||
unless (id = Integer($1, 8)).nil?
|
||||
sym.token_id = id
|
||||
else
|
||||
raise "Unknown Char s_value #{sym}"
|
||||
end
|
||||
when /\A(.)\z/
|
||||
unless (id = $1&.bytes&.first).nil?
|
||||
sym.token_id = id
|
||||
else
|
||||
raise "Unknown Char s_value #{sym}"
|
||||
end
|
||||
else
|
||||
raise "Unknown Char s_value #{sym}"
|
||||
end
|
||||
else
|
||||
sym.token_id = token_id
|
||||
token_id += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def fill_nterms_number
|
||||
token_id = 0
|
||||
|
||||
@nterms.each do |sym|
|
||||
while used_numbers[@number] do
|
||||
@number += 1
|
||||
end
|
||||
|
||||
if sym.number.nil?
|
||||
sym.number = @number
|
||||
used_numbers[@number] = true
|
||||
@number += 1
|
||||
end
|
||||
|
||||
if sym.token_id.nil?
|
||||
sym.token_id = token_id
|
||||
token_id += 1
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def used_numbers
|
||||
return @used_numbers if defined?(@used_numbers)
|
||||
|
||||
@used_numbers = {}
|
||||
symbols.map(&:number).each do |n|
|
||||
@used_numbers[n] = true
|
||||
end
|
||||
@used_numbers
|
||||
end
|
||||
|
||||
def validate_number_uniqueness!
|
||||
invalid = symbols.group_by(&:number).select do |number, syms|
|
||||
syms.count > 1
|
||||
end
|
||||
|
||||
return if invalid.empty?
|
||||
|
||||
raise "Symbol number is duplicated. #{invalid}"
|
||||
end
|
||||
|
||||
def validate_alias_name_uniqueness!
|
||||
invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
|
||||
syms.count > 1
|
||||
end
|
||||
|
||||
return if invalid.empty?
|
||||
|
||||
raise "Symbol alias name is duplicated. #{invalid}"
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
||||
end
|
@ -1,4 +1,5 @@
|
||||
require "strscan"
|
||||
|
||||
require "lrama/lexer/grammar_file"
|
||||
require "lrama/lexer/location"
|
||||
require "lrama/lexer/token"
|
||||
@ -26,6 +27,11 @@ module Lrama
|
||||
%precedence
|
||||
%prec
|
||||
%error-token
|
||||
%before-reduce
|
||||
%after-reduce
|
||||
%after-shift-error-token
|
||||
%after-shift
|
||||
%after-pop-stack
|
||||
%empty
|
||||
%code
|
||||
%rule
|
||||
|
@ -38,7 +38,7 @@ module Lrama
|
||||
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
|
||||
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
||||
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
|
||||
return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
|
||||
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
|
||||
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
|
||||
@ -51,11 +51,22 @@ module Lrama
|
||||
when scanner.scan(/@\$/) # @$
|
||||
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/@(\d+)/) # @1
|
||||
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
|
||||
return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
|
||||
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
|
||||
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
|
||||
|
||||
# $: references
|
||||
when scanner.scan(/\$:\$/) # $:$
|
||||
return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/\$:(\d+)/) # $:1
|
||||
return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
|
||||
return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
|
||||
when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
|
||||
return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
|
||||
|
||||
end
|
||||
end
|
||||
end
|
||||
|
@ -16,8 +16,7 @@ module Lrama
|
||||
|
||||
def initialize(
|
||||
out:, output_file_path:, template_name:, grammar_file_path:,
|
||||
header_out: nil, header_file_path: nil,
|
||||
context:, grammar:, error_recovery: false
|
||||
context:, grammar:, header_out: nil, header_file_path: nil, error_recovery: false
|
||||
)
|
||||
@out = out
|
||||
@output_file_path = output_file_path
|
||||
@ -162,6 +161,61 @@ module Lrama
|
||||
STR
|
||||
end
|
||||
|
||||
def after_shift_function(comment = "")
|
||||
return "" unless @grammar.after_shift
|
||||
|
||||
<<-STR
|
||||
#{comment}
|
||||
#line #{@grammar.after_shift.line} "#{@grammar_file_path}"
|
||||
{#{@grammar.after_shift.s_value}(#{parse_param_name});}
|
||||
#line [@oline@] [@ofile@]
|
||||
STR
|
||||
end
|
||||
|
||||
def before_reduce_function(comment = "")
|
||||
return "" unless @grammar.before_reduce
|
||||
|
||||
<<-STR
|
||||
#{comment}
|
||||
#line #{@grammar.before_reduce.line} "#{@grammar_file_path}"
|
||||
{#{@grammar.before_reduce.s_value}(yylen#{user_args});}
|
||||
#line [@oline@] [@ofile@]
|
||||
STR
|
||||
end
|
||||
|
||||
def after_reduce_function(comment = "")
|
||||
return "" unless @grammar.after_reduce
|
||||
|
||||
<<-STR
|
||||
#{comment}
|
||||
#line #{@grammar.after_reduce.line} "#{@grammar_file_path}"
|
||||
{#{@grammar.after_reduce.s_value}(yylen#{user_args});}
|
||||
#line [@oline@] [@ofile@]
|
||||
STR
|
||||
end
|
||||
|
||||
def after_shift_error_token_function(comment = "")
|
||||
return "" unless @grammar.after_shift_error_token
|
||||
|
||||
<<-STR
|
||||
#{comment}
|
||||
#line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}"
|
||||
{#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});}
|
||||
#line [@oline@] [@ofile@]
|
||||
STR
|
||||
end
|
||||
|
||||
def after_pop_stack_function(len, comment = "")
|
||||
return "" unless @grammar.after_pop_stack
|
||||
|
||||
<<-STR
|
||||
#{comment}
|
||||
#line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}"
|
||||
{#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});}
|
||||
#line [@oline@] [@ofile@]
|
||||
STR
|
||||
end
|
||||
|
||||
def symbol_actions_for_error_token
|
||||
str = ""
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -29,8 +29,8 @@ module Lrama
|
||||
end
|
||||
|
||||
def non_default_reduces
|
||||
reduces.select do |reduce|
|
||||
reduce.rule != @default_reduction_rule
|
||||
reduces.reject do |reduce|
|
||||
reduce.rule == @default_reduction_rule
|
||||
end
|
||||
end
|
||||
|
||||
@ -105,8 +105,8 @@ module Lrama
|
||||
end
|
||||
|
||||
def selected_term_transitions
|
||||
term_transitions.select do |shift, next_state|
|
||||
!shift.not_selected
|
||||
term_transitions.reject do |shift, next_state|
|
||||
shift.not_selected
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -1,8 +1,14 @@
|
||||
# TODO: Validate position is not over rule rhs
|
||||
|
||||
require "forwardable"
|
||||
|
||||
module Lrama
|
||||
class States
|
||||
class Item < Struct.new(:rule, :position, keyword_init: true)
|
||||
extend Forwardable
|
||||
|
||||
def_delegators "rule", :lhs, :rhs
|
||||
|
||||
# Optimization for States#setup_state
|
||||
def hash
|
||||
[rule_id, position].hash
|
||||
@ -20,14 +26,6 @@ module Lrama
|
||||
rhs.count - position
|
||||
end
|
||||
|
||||
def lhs
|
||||
rule.lhs
|
||||
end
|
||||
|
||||
def rhs
|
||||
rule.rhs
|
||||
end
|
||||
|
||||
def next_sym
|
||||
rhs[position]
|
||||
end
|
||||
|
@ -109,8 +109,8 @@ module Lrama
|
||||
io << "\n"
|
||||
|
||||
# Report shifts
|
||||
tmp = state.term_transitions.select do |shift, _|
|
||||
!shift.not_selected
|
||||
tmp = state.term_transitions.reject do |shift, _|
|
||||
shift.not_selected
|
||||
end.map do |shift, next_state|
|
||||
[shift.next_sym, next_state.id]
|
||||
end
|
||||
|
@ -1,3 +1,3 @@
|
||||
module Lrama
|
||||
VERSION = "0.6.2".freeze
|
||||
VERSION = "0.6.3".freeze
|
||||
end
|
||||
|
@ -1752,6 +1752,7 @@ yybackup:
|
||||
*++yyvsp = yylval;
|
||||
YY_IGNORE_MAYBE_UNINITIALIZED_END
|
||||
*++yylsp = yylloc;
|
||||
<%= output.after_shift_function("/* %after-shift code. */") %>
|
||||
|
||||
/* Discard the shifted token. */
|
||||
yychar = YYEMPTY;
|
||||
@ -1784,6 +1785,7 @@ yyreduce:
|
||||
unconditionally makes the parser a bit smaller, and it avoids a
|
||||
GCC warning that YYVAL may be used uninitialized. */
|
||||
yyval = yyvsp[1-yylen];
|
||||
<%= output.before_reduce_function("/* %before-reduce function. */") %>
|
||||
|
||||
/* Default location. */
|
||||
YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);
|
||||
@ -1809,6 +1811,7 @@ yyreduce:
|
||||
YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc<%= output.user_args %>);
|
||||
|
||||
YYPOPSTACK (yylen);
|
||||
<%= output.after_reduce_function("/* %after-reduce function. */") %>
|
||||
yylen = 0;
|
||||
|
||||
*++yyvsp = yyval;
|
||||
@ -1910,6 +1913,7 @@ yyerrorlab:
|
||||
/* Do not reclaim the symbols of the rule whose action triggered
|
||||
this YYERROR. */
|
||||
YYPOPSTACK (yylen);
|
||||
<%= output.after_pop_stack_function("yylen", "/* %after-pop-stack function. */") %>
|
||||
yylen = 0;
|
||||
YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>);
|
||||
yystate = *yyssp;
|
||||
@ -1969,6 +1973,7 @@ yyerrlab1:
|
||||
yydestruct ("Error: popping",
|
||||
YY_ACCESSING_SYMBOL (yystate), yyvsp, yylsp<%= output.user_args %>);
|
||||
YYPOPSTACK (1);
|
||||
<%= output.after_pop_stack_function(1, "/* %after-pop-stack function. */") %>
|
||||
yystate = *yyssp;
|
||||
YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>);
|
||||
}
|
||||
@ -1983,6 +1988,7 @@ yyerrlab1:
|
||||
|
||||
/* Shift the error token. */
|
||||
YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp<%= output.user_args %>);
|
||||
<%= output.after_shift_error_token_function("/* %after-shift-error-token code. */") %>
|
||||
|
||||
yystate = yyn;
|
||||
goto yynewstate;
|
||||
|
Loading…
x
Reference in New Issue
Block a user