Lrama v0.6.3

This commit is contained in:
yui-knk 2024-02-14 21:55:30 +09:00 committed by Yuichiro Kaneko
parent 95ec71efc5
commit 06ad00adc2
21 changed files with 1030 additions and 766 deletions

View File

@ -1,5 +1,39 @@
# NEWS for Lrama # NEWS for Lrama
## Lrama 0.6.3 (2024-02-15)
### Bring Your Own Stack
Provide functionalities for Bring Your Own Stack.
Rubys Ripper library requires their own semantic value stack to manage Ruby Objects returned by user defined callback method. Currently Ripper uses semantic value stack (`yyvsa`) which is used by parser to manage Node. This hack introduces some limitation on Ripper. For example, Ripper can not execute semantic analysis depending on Node structure.
Lrama introduces two features to support another semantic value stack by parser generator users.
1. Callback entry points
User can emulate semantic value stack by these callbacks.
Lrama provides these five callbacks. Registered functions are called when each event happen. For example %after-shift function is called when shift happens on original semantic value stack.
* `%after-shift` function_name
* `%before-reduce` function_name
* `%after-reduce` function_name
* `%after-shift-error-token` function_name
* `%after-pop-stack` function_name
2. `$:n` variable to access index of each grammar symbols
User also needs to access semantic value of their stack in grammar action. `$:n` provides the way to access to it. `$:n` is translated to the minus index from the top of the stack.
For example
```
primary: k_if expr_value then compstmt if_tail k_end
{
/*% ripper: if!($:2, $:4, $:5) %*/
/* $:2 = -5, $:4 = -3, $:5 = -2. */
}
```
## Lrama 0.6.2 (2024-01-27) ## Lrama 0.6.2 (2024-01-27)
### %no-stdlib directive ### %no-stdlib directive

View File

@ -265,9 +265,9 @@ module Lrama
s = actions.each_with_index.map do |n, i| s = actions.each_with_index.map do |n, i|
[i, n] [i, n]
end.select do |i, n| end.reject do |i, n|
# Remove default_reduction_rule entries # Remove default_reduction_rule entries
n != 0 n == 0
end end
if s.count != 0 if s.count != 0
@ -462,7 +462,7 @@ module Lrama
@yylast = high @yylast = high
# replace_ninf # replace_ninf
@yypact_ninf = (@base.select {|i| i != BaseMin } + [0]).min - 1 @yypact_ninf = (@base.reject {|i| i == BaseMin } + [0]).min - 1
@base.map! do |i| @base.map! do |i|
case i case i
when BaseMin when BaseMin
@ -472,7 +472,7 @@ module Lrama
end end
end end
@yytable_ninf = (@table.compact.select {|i| i != ErrorActionNumber } + [0]).min - 1 @yytable_ninf = (@table.compact.reject {|i| i == ErrorActionNumber } + [0]).min - 1
@table.map! do |i| @table.map! do |i|
case i case i
when nil when nil

View File

@ -1,16 +1,18 @@
require "forwardable"
require "lrama/grammar/auxiliary" require "lrama/grammar/auxiliary"
require "lrama/grammar/binding" require "lrama/grammar/binding"
require "lrama/grammar/code" require "lrama/grammar/code"
require "lrama/grammar/counter" require "lrama/grammar/counter"
require "lrama/grammar/error_token" require "lrama/grammar/error_token"
require "lrama/grammar/parameterizing_rule"
require "lrama/grammar/percent_code" require "lrama/grammar/percent_code"
require "lrama/grammar/precedence" require "lrama/grammar/precedence"
require "lrama/grammar/printer" require "lrama/grammar/printer"
require "lrama/grammar/reference" require "lrama/grammar/reference"
require "lrama/grammar/rule" require "lrama/grammar/rule"
require "lrama/grammar/rule_builder" require "lrama/grammar/rule_builder"
require "lrama/grammar/parameterizing_rule"
require "lrama/grammar/symbol" require "lrama/grammar/symbol"
require "lrama/grammar/symbols"
require "lrama/grammar/type" require "lrama/grammar/type"
require "lrama/grammar/union" require "lrama/grammar/union"
require "lrama/lexer" require "lrama/lexer"
@ -18,14 +20,23 @@ require "lrama/lexer"
module Lrama module Lrama
# Grammar is the result of parsing an input grammar file # Grammar is the result of parsing an input grammar file
class Grammar class Grammar
extend Forwardable
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_accessor :union, :expect, attr_accessor :union, :expect,
:printers, :error_tokens, :printers, :error_tokens,
:lex_param, :parse_param, :initial_action, :lex_param, :parse_param, :initial_action,
:symbols, :types, :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
:symbols_resolver, :types,
:rules, :rule_builders, :rules, :rule_builders,
:sym_to_rules, :no_stdlib :sym_to_rules, :no_stdlib
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
:fill_printer, :fill_error_token, :sort_by_number!
def initialize(rule_counter) def initialize(rule_counter)
@rule_counter = rule_counter @rule_counter = rule_counter
@ -33,7 +44,7 @@ module Lrama
@percent_codes = [] @percent_codes = []
@printers = [] @printers = []
@error_tokens = [] @error_tokens = []
@symbols = [] @symbols_resolver = Grammar::Symbols::Resolver.new
@types = [] @types = []
@rule_builders = [] @rule_builders = []
@rules = [] @rules = []
@ -62,44 +73,6 @@ module Lrama
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno) @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
end end
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
if replace
sym.id = id
sym.alias_name = alias_name
sym.tag = tag
end
return sym
end
if (sym = @symbols.find {|s| s.id == id })
return sym
end
sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: true, token_id: token_id, nullable: false
)
@symbols << sym
@terms = nil
return sym
end
def add_nterm(id:, alias_name: nil, tag: nil)
return if @symbols.find {|s| s.id == id }
sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: false, token_id: nil, nullable: nil,
)
@symbols << sym
@nterms = nil
return sym
end
def add_type(id:, tag:) def add_type(id:, tag:)
@types << Type.new(id: id, tag: tag) @types << Type.new(id: id, tag: tag)
end end
@ -165,13 +138,9 @@ module Lrama
normalize_rules normalize_rules
collect_symbols collect_symbols
set_lhs_and_rhs set_lhs_and_rhs
fill_symbol_number
fill_default_precedence fill_default_precedence
fill_symbols
fill_sym_to_rules fill_sym_to_rules
fill_nterm_type
fill_symbol_printer
fill_symbol_error_token
@symbols.sort_by!(&:number)
compute_nullable compute_nullable
compute_first_set compute_first_set
end end
@ -180,40 +149,10 @@ module Lrama
# #
# * Validation for no_declared_type_reference # * Validation for no_declared_type_reference
def validate! def validate!
validate_symbol_number_uniqueness! @symbols_resolver.validate!
validate_symbol_alias_name_uniqueness!
validate_rule_lhs_is_nterm! validate_rule_lhs_is_nterm!
end end
def find_symbol_by_s_value(s_value)
@symbols.find do |sym|
sym.id.s_value == s_value
end
end
def find_symbol_by_s_value!(s_value)
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
end
def find_symbol_by_id(id)
@symbols.find do |sym|
sym.id == id || sym.alias_name == id.s_value
end
end
def find_symbol_by_id!(id)
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
end
def find_symbol_by_number!(number)
sym = @symbols[number]
raise "Symbol not found: #{number}" unless sym
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
sym
end
def find_rules_by_symbol!(sym) def find_rules_by_symbol!(sym)
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found") find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
end end
@ -222,22 +161,6 @@ module Lrama
@sym_to_rules[sym.number] @sym_to_rules[sym.number]
end end
def terms_count
terms.count
end
def terms
@terms ||= @symbols.select(&:term?)
end
def nterms_count
nterms.count
end
def nterms
@nterms ||= @symbols.select(&:nterm?)
end
private private
def compute_nullable def compute_nullable
@ -284,7 +207,7 @@ module Lrama
rule.nullable = false rule.nullable = false
end end
nterms.select {|r| r.nullable.nil? }.each do |nterm| nterms.select {|e| e.nullable.nil? }.each do |nterm|
nterm.nullable = false nterm.nullable = false
end end
end end
@ -330,12 +253,6 @@ module Lrama
end end
end end
def find_nterm_by_id!(id)
nterms.find do |nterm|
nterm.id == id
end || (raise "Nterm not found: #{id}")
end
def append_special_symbols def append_special_symbols
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated # YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2) # term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
@ -397,79 +314,6 @@ module Lrama
end end
end end
# Fill #number and #token_id
def fill_symbol_number
# Character literal in grammar file has
# token id corresponding to ASCII code by default,
# so start token_id from 256.
token_id = 256
# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1
# YYUNDEF = 2
number = 3
nterm_token_id = 0
used_numbers = {}
@symbols.map(&:number).each do |n|
used_numbers[n] = true
end
(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
while used_numbers[number] do
number += 1
end
if sym.number.nil?
sym.number = number
number += 1
end
# If id is Token::Char, it uses ASCII code
if sym.term? && sym.token_id.nil?
if sym.id.is_a?(Lrama::Lexer::Token::Char)
# Ignore ' on the both sides
case sym.id.s_value[1..-2]
when "\\b"
sym.token_id = 8
when "\\f"
sym.token_id = 12
when "\\n"
sym.token_id = 10
when "\\r"
sym.token_id = 13
when "\\t"
sym.token_id = 9
when "\\v"
sym.token_id = 11
when "\""
sym.token_id = 34
when "'"
sym.token_id = 39
when "\\\\"
sym.token_id = 92
when /\A\\(\d+)\z/
sym.token_id = Integer($1, 8)
when /\A(.)\z/
sym.token_id = $1.bytes.first
else
raise "Unknown Char s_value #{sym}"
end
else
sym.token_id = token_id
token_id += 1
end
end
if sym.nterm? && sym.token_id.nil?
sym.token_id = nterm_token_id
nterm_token_id += 1
end
end
end
def set_lhs_and_rhs def set_lhs_and_rhs
@rules.each do |rule| @rules.each do |rule|
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
@ -480,15 +324,6 @@ module Lrama
end end
end end
def token_to_symbol(token)
case token
when Lrama::Lexer::Token
find_symbol_by_id!(token)
else
raise "Unknown class: #{token}"
end
end
# Rule inherits precedence from the last term in RHS. # Rule inherits precedence from the last term in RHS.
# #
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html # https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
@ -506,6 +341,14 @@ module Lrama
end end
end end
def fill_symbols
fill_symbol_number
fill_nterm_type(@types)
fill_printer(@printers)
fill_error_token(@error_tokens)
sort_by_number!
end
def fill_sym_to_rules def fill_sym_to_rules
@rules.each do |rule| @rules.each do |rule|
key = rule.lhs.number key = rule.lhs.number
@ -514,68 +357,6 @@ module Lrama
end end
end end
# Fill nterm's tag defined by %type decl
def fill_nterm_type
@types.each do |type|
nterm = find_nterm_by_id!(type.id)
nterm.tag = type.tag
end
end
def fill_symbol_printer
@symbols.each do |sym|
@printers.each do |printer|
printer.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.printer = printer if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.printer = printer if sym.tag == ident_or_tag
else
raise "Unknown token type. #{printer}"
end
end
end
end
end
def fill_symbol_error_token
@symbols.each do |sym|
@error_tokens.each do |error_token|
error_token.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.error_token = error_token if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.error_token = error_token if sym.tag == ident_or_tag
else
raise "Unknown token type. #{error_token}"
end
end
end
end
end
def validate_symbol_number_uniqueness!
invalid = @symbols.group_by(&:number).select do |number, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol number is duplicated. #{invalid}"
end
def validate_symbol_alias_name_uniqueness!
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol alias name is duplicated. #{invalid}"
end
def validate_rule_lhs_is_nterm! def validate_rule_lhs_is_nterm!
errors = [] errors = []

View File

@ -6,18 +6,24 @@ module Lrama
# * ($$) yylval # * ($$) yylval
# * (@$) yylloc # * (@$) yylloc
# * ($:$) error
# * ($1) error # * ($1) error
# * (@1) error # * (@1) error
# * ($:1) error
def reference_to_c(ref) def reference_to_c(ref)
case case
when ref.type == :dollar && ref.name == "$" # $$ when ref.type == :dollar && ref.name == "$" # $$
"yylval" "yylval"
when ref.type == :at && ref.name == "$" # @$ when ref.type == :at && ref.name == "$" # @$
"yylloc" "yylloc"
when ref.type == :index && ref.name == "$" # $:$
raise "$:#{ref.value} can not be used in initial_action."
when ref.type == :dollar # $n when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in initial_action." raise "$#{ref.value} can not be used in initial_action."
when ref.type == :at # @n when ref.type == :at # @n
raise "@#{ref.value} can not be used in initial_action." raise "@#{ref.value} can not be used in initial_action."
when ref.type == :index # $:n
raise "$:#{ref.value} can not be used in initial_action."
else else
raise "Unexpected. #{self}, #{ref}" raise "Unexpected. #{self}, #{ref}"
end end

View File

@ -6,14 +6,18 @@ module Lrama
# * ($$) error # * ($$) error
# * (@$) error # * (@$) error
# * ($:$) error
# * ($1) error # * ($1) error
# * (@1) error # * (@1) error
# * ($:1) error
def reference_to_c(ref) def reference_to_c(ref)
case case
when ref.type == :dollar # $$, $n when ref.type == :dollar # $$, $n
raise "$#{ref.value} can not be used in #{type}." raise "$#{ref.value} can not be used in #{type}."
when ref.type == :at # @$, @n when ref.type == :at # @$, @n
raise "@#{ref.value} can not be used in #{type}." raise "@#{ref.value} can not be used in #{type}."
when ref.type == :index # $:$, $:n
raise "$:#{ref.value} can not be used in #{type}."
else else
raise "Unexpected. #{self}, #{ref}" raise "Unexpected. #{self}, #{ref}"
end end

View File

@ -11,8 +11,10 @@ module Lrama
# * ($$) *yyvaluep # * ($$) *yyvaluep
# * (@$) *yylocationp # * (@$) *yylocationp
# * ($:$) error
# * ($1) error # * ($1) error
# * (@1) error # * (@1) error
# * ($:1) error
def reference_to_c(ref) def reference_to_c(ref)
case case
when ref.type == :dollar && ref.name == "$" # $$ when ref.type == :dollar && ref.name == "$" # $$
@ -20,10 +22,14 @@ module Lrama
"((*yyvaluep).#{member})" "((*yyvaluep).#{member})"
when ref.type == :at && ref.name == "$" # @$ when ref.type == :at && ref.name == "$" # @$
"(*yylocationp)" "(*yylocationp)"
when ref.type == :index && ref.name == "$" # $:$
raise "$:#{ref.value} can not be used in #{type}."
when ref.type == :dollar # $n when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in #{type}." raise "$#{ref.value} can not be used in #{type}."
when ref.type == :at # @n when ref.type == :at # @n
raise "@#{ref.value} can not be used in #{type}." raise "@#{ref.value} can not be used in #{type}."
when ref.type == :index # $:n
raise "$:#{ref.value} can not be used in #{type}."
else else
raise "Unexpected. #{self}, #{ref}" raise "Unexpected. #{self}, #{ref}"
end end

View File

@ -11,8 +11,10 @@ module Lrama
# * ($$) yyval # * ($$) yyval
# * (@$) yyloc # * (@$) yyloc
# * ($:$) error
# * ($1) yyvsp[i] # * ($1) yyvsp[i]
# * (@1) yylsp[i] # * (@1) yylsp[i]
# * ($:1) i - 1
# #
# #
# Consider a rule like # Consider a rule like
@ -24,6 +26,8 @@ module Lrama
# "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } # "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
# "Position in grammar" $1 $2 $3 $4 $5 # "Position in grammar" $1 $2 $3 $4 $5
# "Index for yyvsp" -4 -3 -2 -1 0 # "Index for yyvsp" -4 -3 -2 -1 0
# "$:n" $:1 $:2 $:3 $:4 $:5
# "index of $:n" -5 -4 -3 -2 -1
# #
# #
# For the first midrule action: # For the first midrule action:
@ -31,6 +35,7 @@ module Lrama
# "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end } # "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
# "Position in grammar" $1 # "Position in grammar" $1
# "Index for yyvsp" 0 # "Index for yyvsp" 0
# "$:n" $:1
def reference_to_c(ref) def reference_to_c(ref)
case case
when ref.type == :dollar && ref.name == "$" # $$ when ref.type == :dollar && ref.name == "$" # $$
@ -39,6 +44,8 @@ module Lrama
"(yyval.#{tag.member})" "(yyval.#{tag.member})"
when ref.type == :at && ref.name == "$" # @$ when ref.type == :at && ref.name == "$" # @$
"(yyloc)" "(yyloc)"
when ref.type == :index && ref.name == "$" # $:$
raise "$:$ is not supported"
when ref.type == :dollar # $n when ref.type == :dollar # $n
i = -position_in_rhs + ref.index i = -position_in_rhs + ref.index
tag = ref.ex_tag || rhs[ref.index - 1].tag tag = ref.ex_tag || rhs[ref.index - 1].tag
@ -47,6 +54,9 @@ module Lrama
when ref.type == :at # @n when ref.type == :at # @n
i = -position_in_rhs + ref.index i = -position_in_rhs + ref.index
"(yylsp[#{i}])" "(yylsp[#{i}])"
when ref.type == :index # $:n
i = -position_in_rhs + ref.index
"(#{i} - 1)"
else else
raise "Unexpected. #{self}, #{ref}" raise "Unexpected. #{self}, #{ref}"
end end
@ -70,7 +80,7 @@ module Lrama
end end
def raise_tag_not_found_error(ref) def raise_tag_not_found_error(ref)
raise "Tag is not specified for '$#{ref.value}' in '#{@rule.to_s}'" raise "Tag is not specified for '$#{ref.value}' in '#{@rule}'"
end end
end end
end end

View File

@ -2,11 +2,12 @@ module Lrama
class Grammar class Grammar
# type: :dollar or :at # type: :dollar or :at
# name: String (e.g. $$, $foo, $expr.right) # name: String (e.g. $$, $foo, $expr.right)
# index: Integer (e.g. $1) # number: Integer (e.g. $1)
# index: Integer
# ex_tag: "$<tag>1" (Optional) # ex_tag: "$<tag>1" (Optional)
class Reference < Struct.new(:type, :name, :index, :ex_tag, :first_column, :last_column, keyword_init: true) class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
def value def value
name || index name || number
end end
end end
end end

View File

@ -181,11 +181,18 @@ module Lrama
if referring_symbol[1] == 0 # Refers to LHS if referring_symbol[1] == 0 # Refers to LHS
ref.name = '$' ref.name = '$'
else else
ref.index = referring_symbol[1] ref.number = referring_symbol[1]
end end
end end
end end
if ref.number
# TODO: When Inlining is implemented, for example, if `$1` is expanded to multiple RHS tokens,
# `$2` needs to access `$2 + n` to actually access it. So, after the Inlining implementation,
# it needs resolves from number to index.
ref.index = ref.number
end
# TODO: Need to check index of @ too? # TODO: Need to check index of @ too?
next if ref.type == :at next if ref.type == :at

View File

@ -11,7 +11,7 @@ module Lrama
attr_reader :term attr_reader :term
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def initialize(id:, alias_name: nil, number: nil, tag: nil, term:, token_id: nil, nullable: nil, precedence: nil, printer: nil) def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil, nullable: nil, precedence: nil, printer: nil)
@id = id @id = id
@alias_name = alias_name @alias_name = alias_name
@number = number @number = number

View File

@ -0,0 +1 @@
require_relative "symbols/resolver"

View File

@ -0,0 +1,276 @@
module Lrama
class Grammar
class Symbols
class Resolver
attr_reader :terms, :nterms
def initialize
@terms = []
@nterms = []
end
def symbols
@symbols ||= (@terms + @nterms)
end
def sort_by_number!
symbols.sort_by!(&:number)
end
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = find_symbol_by_token_id(token_id))
if replace
sym.id = id
sym.alias_name = alias_name
sym.tag = tag
end
return sym
end
if (sym = find_symbol_by_id(id))
return sym
end
@symbols = nil
term = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: true, token_id: token_id, nullable: false
)
@terms << term
term
end
def add_nterm(id:, alias_name: nil, tag: nil)
return if find_symbol_by_id(id)
@symbols = nil
nterm = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: false, token_id: nil, nullable: nil,
)
@nterms << nterm
nterm
end
def find_symbol_by_s_value(s_value)
symbols.find { |s| s.id.s_value == s_value }
end
def find_symbol_by_s_value!(s_value)
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
end
def find_symbol_by_id(id)
symbols.find do |s|
s.id == id || s.alias_name == id.s_value
end
end
def find_symbol_by_id!(id)
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
end
def find_symbol_by_token_id(token_id)
symbols.find {|s| s.token_id == token_id }
end
def find_symbol_by_number!(number)
sym = symbols[number]
raise "Symbol not found: #{number}" unless sym
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
sym
end
def fill_symbol_number
# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1
# YYUNDEF = 2
@number = 3
fill_terms_number
fill_nterms_number
end
def fill_nterm_type(types)
types.each do |type|
nterm = find_nterm_by_id!(type.id)
nterm.tag = type.tag
end
end
def fill_printer(printers)
symbols.each do |sym|
printers.each do |printer|
printer.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.printer = printer if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.printer = printer if sym.tag == ident_or_tag
else
raise "Unknown token type. #{printer}"
end
end
end
end
end
def fill_error_token(error_tokens)
symbols.each do |sym|
error_tokens.each do |token|
token.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.error_token = token if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.error_token = token if sym.tag == ident_or_tag
else
raise "Unknown token type. #{token}"
end
end
end
end
end
def token_to_symbol(token)
case token
when Lrama::Lexer::Token
find_symbol_by_id!(token)
else
raise "Unknown class: #{token}"
end
end
def validate!
validate_number_uniqueness!
validate_alias_name_uniqueness!
end
private
def find_nterm_by_id!(id)
@nterms.find do |s|
s.id == id
end || (raise "Symbol not found: #{id}")
end
def fill_terms_number
# Character literal in grammar file has
# token id corresponding to ASCII code by default,
# so start token_id from 256.
token_id = 256
@terms.each do |sym|
while used_numbers[@number] do
@number += 1
end
if sym.number.nil?
sym.number = @number
used_numbers[@number] = true
@number += 1
end
# If id is Token::Char, it uses ASCII code
if sym.token_id.nil?
if sym.id.is_a?(Lrama::Lexer::Token::Char)
# Ignore ' on the both sides
case sym.id.s_value[1..-2]
when "\\b"
sym.token_id = 8
when "\\f"
sym.token_id = 12
when "\\n"
sym.token_id = 10
when "\\r"
sym.token_id = 13
when "\\t"
sym.token_id = 9
when "\\v"
sym.token_id = 11
when "\""
sym.token_id = 34
when "'"
sym.token_id = 39
when "\\\\"
sym.token_id = 92
when /\A\\(\d+)\z/
unless (id = Integer($1, 8)).nil?
sym.token_id = id
else
raise "Unknown Char s_value #{sym}"
end
when /\A(.)\z/
unless (id = $1&.bytes&.first).nil?
sym.token_id = id
else
raise "Unknown Char s_value #{sym}"
end
else
raise "Unknown Char s_value #{sym}"
end
else
sym.token_id = token_id
token_id += 1
end
end
end
end
def fill_nterms_number
token_id = 0
@nterms.each do |sym|
while used_numbers[@number] do
@number += 1
end
if sym.number.nil?
sym.number = @number
used_numbers[@number] = true
@number += 1
end
if sym.token_id.nil?
sym.token_id = token_id
token_id += 1
end
end
end
def used_numbers
return @used_numbers if defined?(@used_numbers)
@used_numbers = {}
symbols.map(&:number).each do |n|
@used_numbers[n] = true
end
@used_numbers
end
def validate_number_uniqueness!
invalid = symbols.group_by(&:number).select do |number, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol number is duplicated. #{invalid}"
end
def validate_alias_name_uniqueness!
invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol alias name is duplicated. #{invalid}"
end
end
end
end
end

View File

@ -1,4 +1,5 @@
require "strscan" require "strscan"
require "lrama/lexer/grammar_file" require "lrama/lexer/grammar_file"
require "lrama/lexer/location" require "lrama/lexer/location"
require "lrama/lexer/token" require "lrama/lexer/token"
@ -26,6 +27,11 @@ module Lrama
%precedence %precedence
%prec %prec
%error-token %error-token
%before-reduce
%after-reduce
%after-shift-error-token
%after-shift
%after-pop-stack
%empty %empty
%code %code
%rule %rule

View File

@ -38,7 +38,7 @@ module Lrama
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos) return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1 when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos) return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets) when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos) return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
@ -51,11 +51,22 @@ module Lrama
when scanner.scan(/@\$/) # @$ when scanner.scan(/@\$/) # @$
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos) return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
when scanner.scan(/@(\d+)/) # @1 when scanner.scan(/@(\d+)/) # @1
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos) return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets) when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets) when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos) return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
# $: references
when scanner.scan(/\$:\$/) # $:$
return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
when scanner.scan(/\$:(\d+)/) # $:1
return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
end end
end end
end end

View File

@ -16,8 +16,7 @@ module Lrama
def initialize( def initialize(
out:, output_file_path:, template_name:, grammar_file_path:, out:, output_file_path:, template_name:, grammar_file_path:,
header_out: nil, header_file_path: nil, context:, grammar:, header_out: nil, header_file_path: nil, error_recovery: false
context:, grammar:, error_recovery: false
) )
@out = out @out = out
@output_file_path = output_file_path @output_file_path = output_file_path
@ -162,6 +161,61 @@ module Lrama
STR STR
end end
def after_shift_function(comment = "")
return "" unless @grammar.after_shift
<<-STR
#{comment}
#line #{@grammar.after_shift.line} "#{@grammar_file_path}"
{#{@grammar.after_shift.s_value}(#{parse_param_name});}
#line [@oline@] [@ofile@]
STR
end
def before_reduce_function(comment = "")
return "" unless @grammar.before_reduce
<<-STR
#{comment}
#line #{@grammar.before_reduce.line} "#{@grammar_file_path}"
{#{@grammar.before_reduce.s_value}(yylen#{user_args});}
#line [@oline@] [@ofile@]
STR
end
def after_reduce_function(comment = "")
return "" unless @grammar.after_reduce
<<-STR
#{comment}
#line #{@grammar.after_reduce.line} "#{@grammar_file_path}"
{#{@grammar.after_reduce.s_value}(yylen#{user_args});}
#line [@oline@] [@ofile@]
STR
end
def after_shift_error_token_function(comment = "")
return "" unless @grammar.after_shift_error_token
<<-STR
#{comment}
#line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}"
{#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});}
#line [@oline@] [@ofile@]
STR
end
def after_pop_stack_function(len, comment = "")
return "" unless @grammar.after_pop_stack
<<-STR
#{comment}
#line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}"
{#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});}
#line [@oline@] [@ofile@]
STR
end
def symbol_actions_for_error_token def symbol_actions_for_error_token
str = "" str = ""

File diff suppressed because it is too large Load Diff

View File

@ -29,8 +29,8 @@ module Lrama
end end
def non_default_reduces def non_default_reduces
reduces.select do |reduce| reduces.reject do |reduce|
reduce.rule != @default_reduction_rule reduce.rule == @default_reduction_rule
end end
end end
@ -105,8 +105,8 @@ module Lrama
end end
def selected_term_transitions def selected_term_transitions
term_transitions.select do |shift, next_state| term_transitions.reject do |shift, next_state|
!shift.not_selected shift.not_selected
end end
end end

View File

@ -1,8 +1,14 @@
# TODO: Validate position is not over rule rhs # TODO: Validate position is not over rule rhs
require "forwardable"
module Lrama module Lrama
class States class States
class Item < Struct.new(:rule, :position, keyword_init: true) class Item < Struct.new(:rule, :position, keyword_init: true)
extend Forwardable
def_delegators "rule", :lhs, :rhs
# Optimization for States#setup_state # Optimization for States#setup_state
def hash def hash
[rule_id, position].hash [rule_id, position].hash
@ -20,14 +26,6 @@ module Lrama
rhs.count - position rhs.count - position
end end
def lhs
rule.lhs
end
def rhs
rule.rhs
end
def next_sym def next_sym
rhs[position] rhs[position]
end end

View File

@ -109,8 +109,8 @@ module Lrama
io << "\n" io << "\n"
# Report shifts # Report shifts
tmp = state.term_transitions.select do |shift, _| tmp = state.term_transitions.reject do |shift, _|
!shift.not_selected shift.not_selected
end.map do |shift, next_state| end.map do |shift, next_state|
[shift.next_sym, next_state.id] [shift.next_sym, next_state.id]
end end

View File

@ -1,3 +1,3 @@
module Lrama module Lrama
VERSION = "0.6.2".freeze VERSION = "0.6.3".freeze
end end

View File

@ -1752,6 +1752,7 @@ yybackup:
*++yyvsp = yylval; *++yyvsp = yylval;
YY_IGNORE_MAYBE_UNINITIALIZED_END YY_IGNORE_MAYBE_UNINITIALIZED_END
*++yylsp = yylloc; *++yylsp = yylloc;
<%= output.after_shift_function("/* %after-shift code. */") %>
/* Discard the shifted token. */ /* Discard the shifted token. */
yychar = YYEMPTY; yychar = YYEMPTY;
@ -1784,6 +1785,7 @@ yyreduce:
unconditionally makes the parser a bit smaller, and it avoids a unconditionally makes the parser a bit smaller, and it avoids a
GCC warning that YYVAL may be used uninitialized. */ GCC warning that YYVAL may be used uninitialized. */
yyval = yyvsp[1-yylen]; yyval = yyvsp[1-yylen];
<%= output.before_reduce_function("/* %before-reduce function. */") %>
/* Default location. */ /* Default location. */
YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen); YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);
@ -1809,6 +1811,7 @@ yyreduce:
YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc<%= output.user_args %>); YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc<%= output.user_args %>);
YYPOPSTACK (yylen); YYPOPSTACK (yylen);
<%= output.after_reduce_function("/* %after-reduce function. */") %>
yylen = 0; yylen = 0;
*++yyvsp = yyval; *++yyvsp = yyval;
@ -1910,6 +1913,7 @@ yyerrorlab:
/* Do not reclaim the symbols of the rule whose action triggered /* Do not reclaim the symbols of the rule whose action triggered
this YYERROR. */ this YYERROR. */
YYPOPSTACK (yylen); YYPOPSTACK (yylen);
<%= output.after_pop_stack_function("yylen", "/* %after-pop-stack function. */") %>
yylen = 0; yylen = 0;
YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>); YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>);
yystate = *yyssp; yystate = *yyssp;
@ -1969,6 +1973,7 @@ yyerrlab1:
yydestruct ("Error: popping", yydestruct ("Error: popping",
YY_ACCESSING_SYMBOL (yystate), yyvsp, yylsp<%= output.user_args %>); YY_ACCESSING_SYMBOL (yystate), yyvsp, yylsp<%= output.user_args %>);
YYPOPSTACK (1); YYPOPSTACK (1);
<%= output.after_pop_stack_function(1, "/* %after-pop-stack function. */") %>
yystate = *yyssp; yystate = *yyssp;
YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>); YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>);
} }
@ -1983,6 +1988,7 @@ yyerrlab1:
/* Shift the error token. */ /* Shift the error token. */
YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp<%= output.user_args %>); YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp<%= output.user_args %>);
<%= output.after_shift_error_token_function("/* %after-shift-error-token code. */") %>
yystate = yyn; yystate = yyn;
goto yynewstate; goto yynewstate;