Lrama v0.6.3

This commit is contained in:
yui-knk 2024-02-14 21:55:30 +09:00 committed by Yuichiro Kaneko
parent 95ec71efc5
commit 06ad00adc2
21 changed files with 1030 additions and 766 deletions

View File

@ -1,5 +1,39 @@
# NEWS for Lrama
## Lrama 0.6.3 (2024-02-15)
### Bring Your Own Stack
Provide functionalities for Bring Your Own Stack.
Rubys Ripper library requires their own semantic value stack to manage Ruby Objects returned by user defined callback method. Currently Ripper uses semantic value stack (`yyvsa`) which is used by parser to manage Node. This hack introduces some limitation on Ripper. For example, Ripper can not execute semantic analysis depending on Node structure.
Lrama introduces two features to support another semantic value stack by parser generator users.
1. Callback entry points
User can emulate semantic value stack by these callbacks.
Lrama provides these five callbacks. Registered functions are called when each event happen. For example %after-shift function is called when shift happens on original semantic value stack.
* `%after-shift` function_name
* `%before-reduce` function_name
* `%after-reduce` function_name
* `%after-shift-error-token` function_name
* `%after-pop-stack` function_name
2. `$:n` variable to access index of each grammar symbols
User also needs to access semantic value of their stack in grammar action. `$:n` provides the way to access to it. `$:n` is translated to the minus index from the top of the stack.
For example
```
primary: k_if expr_value then compstmt if_tail k_end
{
/*% ripper: if!($:2, $:4, $:5) %*/
/* $:2 = -5, $:4 = -3, $:5 = -2. */
}
```
## Lrama 0.6.2 (2024-01-27)
### %no-stdlib directive

View File

@ -265,9 +265,9 @@ module Lrama
s = actions.each_with_index.map do |n, i|
[i, n]
end.select do |i, n|
end.reject do |i, n|
# Remove default_reduction_rule entries
n != 0
n == 0
end
if s.count != 0
@ -462,7 +462,7 @@ module Lrama
@yylast = high
# replace_ninf
@yypact_ninf = (@base.select {|i| i != BaseMin } + [0]).min - 1
@yypact_ninf = (@base.reject {|i| i == BaseMin } + [0]).min - 1
@base.map! do |i|
case i
when BaseMin
@ -472,7 +472,7 @@ module Lrama
end
end
@yytable_ninf = (@table.compact.select {|i| i != ErrorActionNumber } + [0]).min - 1
@yytable_ninf = (@table.compact.reject {|i| i == ErrorActionNumber } + [0]).min - 1
@table.map! do |i|
case i
when nil

View File

@ -1,16 +1,18 @@
require "forwardable"
require "lrama/grammar/auxiliary"
require "lrama/grammar/binding"
require "lrama/grammar/code"
require "lrama/grammar/counter"
require "lrama/grammar/error_token"
require "lrama/grammar/parameterizing_rule"
require "lrama/grammar/percent_code"
require "lrama/grammar/precedence"
require "lrama/grammar/printer"
require "lrama/grammar/reference"
require "lrama/grammar/rule"
require "lrama/grammar/rule_builder"
require "lrama/grammar/parameterizing_rule"
require "lrama/grammar/symbol"
require "lrama/grammar/symbols"
require "lrama/grammar/type"
require "lrama/grammar/union"
require "lrama/lexer"
@ -18,14 +20,23 @@ require "lrama/lexer"
module Lrama
# Grammar is the result of parsing an input grammar file
class Grammar
extend Forwardable
attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_accessor :union, :expect,
:printers, :error_tokens,
:lex_param, :parse_param, :initial_action,
:symbols, :types,
:after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack,
:symbols_resolver, :types,
:rules, :rule_builders,
:sym_to_rules, :no_stdlib
def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term,
:find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol,
:find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type,
:fill_printer, :fill_error_token, :sort_by_number!
def initialize(rule_counter)
@rule_counter = rule_counter
@ -33,7 +44,7 @@ module Lrama
@percent_codes = []
@printers = []
@error_tokens = []
@symbols = []
@symbols_resolver = Grammar::Symbols::Resolver.new
@types = []
@rule_builders = []
@rules = []
@ -62,44 +73,6 @@ module Lrama
@error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, token_code: token_code, lineno: lineno)
end
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
if replace
sym.id = id
sym.alias_name = alias_name
sym.tag = tag
end
return sym
end
if (sym = @symbols.find {|s| s.id == id })
return sym
end
sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: true, token_id: token_id, nullable: false
)
@symbols << sym
@terms = nil
return sym
end
def add_nterm(id:, alias_name: nil, tag: nil)
return if @symbols.find {|s| s.id == id }
sym = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: false, token_id: nil, nullable: nil,
)
@symbols << sym
@nterms = nil
return sym
end
def add_type(id:, tag:)
@types << Type.new(id: id, tag: tag)
end
@ -165,13 +138,9 @@ module Lrama
normalize_rules
collect_symbols
set_lhs_and_rhs
fill_symbol_number
fill_default_precedence
fill_symbols
fill_sym_to_rules
fill_nterm_type
fill_symbol_printer
fill_symbol_error_token
@symbols.sort_by!(&:number)
compute_nullable
compute_first_set
end
@ -180,40 +149,10 @@ module Lrama
#
# * Validation for no_declared_type_reference
def validate!
validate_symbol_number_uniqueness!
validate_symbol_alias_name_uniqueness!
@symbols_resolver.validate!
validate_rule_lhs_is_nterm!
end
def find_symbol_by_s_value(s_value)
@symbols.find do |sym|
sym.id.s_value == s_value
end
end
def find_symbol_by_s_value!(s_value)
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
end
def find_symbol_by_id(id)
@symbols.find do |sym|
sym.id == id || sym.alias_name == id.s_value
end
end
def find_symbol_by_id!(id)
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
end
def find_symbol_by_number!(number)
sym = @symbols[number]
raise "Symbol not found: #{number}" unless sym
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
sym
end
def find_rules_by_symbol!(sym)
find_rules_by_symbol(sym) || (raise "Rules for #{sym} not found")
end
@ -222,22 +161,6 @@ module Lrama
@sym_to_rules[sym.number]
end
def terms_count
terms.count
end
def terms
@terms ||= @symbols.select(&:term?)
end
def nterms_count
nterms.count
end
def nterms
@nterms ||= @symbols.select(&:nterm?)
end
private
def compute_nullable
@ -284,7 +207,7 @@ module Lrama
rule.nullable = false
end
nterms.select {|r| r.nullable.nil? }.each do |nterm|
nterms.select {|e| e.nullable.nil? }.each do |nterm|
nterm.nullable = false
end
end
@ -330,12 +253,6 @@ module Lrama
end
end
def find_nterm_by_id!(id)
nterms.find do |nterm|
nterm.id == id
end || (raise "Nterm not found: #{id}")
end
def append_special_symbols
# YYEMPTY (token_id: -2, number: -2) is added when a template is evaluated
# term = add_term(id: Token.new(Token::Ident, "YYEMPTY"), token_id: -2)
@ -397,79 +314,6 @@ module Lrama
end
end
# Fill #number and #token_id
def fill_symbol_number
# Character literal in grammar file has
# token id corresponding to ASCII code by default,
# so start token_id from 256.
token_id = 256
# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1
# YYUNDEF = 2
number = 3
nterm_token_id = 0
used_numbers = {}
@symbols.map(&:number).each do |n|
used_numbers[n] = true
end
(@symbols.select(&:term?) + @symbols.select(&:nterm?)).each do |sym|
while used_numbers[number] do
number += 1
end
if sym.number.nil?
sym.number = number
number += 1
end
# If id is Token::Char, it uses ASCII code
if sym.term? && sym.token_id.nil?
if sym.id.is_a?(Lrama::Lexer::Token::Char)
# Ignore ' on the both sides
case sym.id.s_value[1..-2]
when "\\b"
sym.token_id = 8
when "\\f"
sym.token_id = 12
when "\\n"
sym.token_id = 10
when "\\r"
sym.token_id = 13
when "\\t"
sym.token_id = 9
when "\\v"
sym.token_id = 11
when "\""
sym.token_id = 34
when "'"
sym.token_id = 39
when "\\\\"
sym.token_id = 92
when /\A\\(\d+)\z/
sym.token_id = Integer($1, 8)
when /\A(.)\z/
sym.token_id = $1.bytes.first
else
raise "Unknown Char s_value #{sym}"
end
else
sym.token_id = token_id
token_id += 1
end
end
if sym.nterm? && sym.token_id.nil?
sym.token_id = nterm_token_id
nterm_token_id += 1
end
end
end
def set_lhs_and_rhs
@rules.each do |rule|
rule.lhs = token_to_symbol(rule._lhs) if rule._lhs
@ -480,15 +324,6 @@ module Lrama
end
end
def token_to_symbol(token)
case token
when Lrama::Lexer::Token
find_symbol_by_id!(token)
else
raise "Unknown class: #{token}"
end
end
# Rule inherits precedence from the last term in RHS.
#
# https://www.gnu.org/software/bison/manual/html_node/How-Precedence.html
@ -506,6 +341,14 @@ module Lrama
end
end
def fill_symbols
fill_symbol_number
fill_nterm_type(@types)
fill_printer(@printers)
fill_error_token(@error_tokens)
sort_by_number!
end
def fill_sym_to_rules
@rules.each do |rule|
key = rule.lhs.number
@ -514,68 +357,6 @@ module Lrama
end
end
# Fill nterm's tag defined by %type decl
def fill_nterm_type
@types.each do |type|
nterm = find_nterm_by_id!(type.id)
nterm.tag = type.tag
end
end
def fill_symbol_printer
@symbols.each do |sym|
@printers.each do |printer|
printer.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.printer = printer if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.printer = printer if sym.tag == ident_or_tag
else
raise "Unknown token type. #{printer}"
end
end
end
end
end
def fill_symbol_error_token
@symbols.each do |sym|
@error_tokens.each do |error_token|
error_token.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.error_token = error_token if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.error_token = error_token if sym.tag == ident_or_tag
else
raise "Unknown token type. #{error_token}"
end
end
end
end
end
def validate_symbol_number_uniqueness!
invalid = @symbols.group_by(&:number).select do |number, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol number is duplicated. #{invalid}"
end
def validate_symbol_alias_name_uniqueness!
invalid = @symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol alias name is duplicated. #{invalid}"
end
def validate_rule_lhs_is_nterm!
errors = []

View File

@ -6,18 +6,24 @@ module Lrama
# * ($$) yylval
# * (@$) yylloc
# * ($:$) error
# * ($1) error
# * (@1) error
# * ($:1) error
def reference_to_c(ref)
case
when ref.type == :dollar && ref.name == "$" # $$
"yylval"
when ref.type == :at && ref.name == "$" # @$
"yylloc"
when ref.type == :index && ref.name == "$" # $:$
raise "$:#{ref.value} can not be used in initial_action."
when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in initial_action."
when ref.type == :at # @n
raise "@#{ref.value} can not be used in initial_action."
when ref.type == :index # $:n
raise "$:#{ref.value} can not be used in initial_action."
else
raise "Unexpected. #{self}, #{ref}"
end

View File

@ -6,14 +6,18 @@ module Lrama
# * ($$) error
# * (@$) error
# * ($:$) error
# * ($1) error
# * (@1) error
# * ($:1) error
def reference_to_c(ref)
case
when ref.type == :dollar # $$, $n
raise "$#{ref.value} can not be used in #{type}."
when ref.type == :at # @$, @n
raise "@#{ref.value} can not be used in #{type}."
when ref.type == :index # $:$, $:n
raise "$:#{ref.value} can not be used in #{type}."
else
raise "Unexpected. #{self}, #{ref}"
end

View File

@ -11,8 +11,10 @@ module Lrama
# * ($$) *yyvaluep
# * (@$) *yylocationp
# * ($:$) error
# * ($1) error
# * (@1) error
# * ($:1) error
def reference_to_c(ref)
case
when ref.type == :dollar && ref.name == "$" # $$
@ -20,10 +22,14 @@ module Lrama
"((*yyvaluep).#{member})"
when ref.type == :at && ref.name == "$" # @$
"(*yylocationp)"
when ref.type == :index && ref.name == "$" # $:$
raise "$:#{ref.value} can not be used in #{type}."
when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in #{type}."
when ref.type == :at # @n
raise "@#{ref.value} can not be used in #{type}."
when ref.type == :index # $:n
raise "$:#{ref.value} can not be used in #{type}."
else
raise "Unexpected. #{self}, #{ref}"
end

View File

@ -11,8 +11,10 @@ module Lrama
# * ($$) yyval
# * (@$) yyloc
# * ($:$) error
# * ($1) yyvsp[i]
# * (@1) yylsp[i]
# * ($:1) i - 1
#
#
# Consider a rule like
@ -24,6 +26,8 @@ module Lrama
# "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
# "Position in grammar" $1 $2 $3 $4 $5
# "Index for yyvsp" -4 -3 -2 -1 0
# "$:n" $:1 $:2 $:3 $:4 $:5
# "index of $:n" -5 -4 -3 -2 -1
#
#
# For the first midrule action:
@ -31,6 +35,7 @@ module Lrama
# "Rule" class: keyword_class { $1 } tSTRING { $2 + $3 } keyword_end { $class = $1 + $keyword_end }
# "Position in grammar" $1
# "Index for yyvsp" 0
# "$:n" $:1
def reference_to_c(ref)
case
when ref.type == :dollar && ref.name == "$" # $$
@ -39,6 +44,8 @@ module Lrama
"(yyval.#{tag.member})"
when ref.type == :at && ref.name == "$" # @$
"(yyloc)"
when ref.type == :index && ref.name == "$" # $:$
raise "$:$ is not supported"
when ref.type == :dollar # $n
i = -position_in_rhs + ref.index
tag = ref.ex_tag || rhs[ref.index - 1].tag
@ -47,6 +54,9 @@ module Lrama
when ref.type == :at # @n
i = -position_in_rhs + ref.index
"(yylsp[#{i}])"
when ref.type == :index # $:n
i = -position_in_rhs + ref.index
"(#{i} - 1)"
else
raise "Unexpected. #{self}, #{ref}"
end
@ -70,7 +80,7 @@ module Lrama
end
def raise_tag_not_found_error(ref)
raise "Tag is not specified for '$#{ref.value}' in '#{@rule.to_s}'"
raise "Tag is not specified for '$#{ref.value}' in '#{@rule}'"
end
end
end

View File

@ -2,11 +2,12 @@ module Lrama
class Grammar
# type: :dollar or :at
# name: String (e.g. $$, $foo, $expr.right)
# index: Integer (e.g. $1)
# number: Integer (e.g. $1)
# index: Integer
# ex_tag: "$<tag>1" (Optional)
class Reference < Struct.new(:type, :name, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
class Reference < Struct.new(:type, :name, :number, :index, :ex_tag, :first_column, :last_column, keyword_init: true)
def value
name || index
name || number
end
end
end

View File

@ -181,11 +181,18 @@ module Lrama
if referring_symbol[1] == 0 # Refers to LHS
ref.name = '$'
else
ref.index = referring_symbol[1]
ref.number = referring_symbol[1]
end
end
end
if ref.number
# TODO: When Inlining is implemented, for example, if `$1` is expanded to multiple RHS tokens,
# `$2` needs to access `$2 + n` to actually access it. So, after the Inlining implementation,
# it needs resolves from number to index.
ref.index = ref.number
end
# TODO: Need to check index of @ too?
next if ref.type == :at

View File

@ -11,7 +11,7 @@ module Lrama
attr_reader :term
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def initialize(id:, alias_name: nil, number: nil, tag: nil, term:, token_id: nil, nullable: nil, precedence: nil, printer: nil)
def initialize(id:, term:, alias_name: nil, number: nil, tag: nil, token_id: nil, nullable: nil, precedence: nil, printer: nil)
@id = id
@alias_name = alias_name
@number = number

View File

@ -0,0 +1 @@
require_relative "symbols/resolver"

View File

@ -0,0 +1,276 @@
module Lrama
class Grammar
class Symbols
class Resolver
attr_reader :terms, :nterms
def initialize
@terms = []
@nterms = []
end
def symbols
@symbols ||= (@terms + @nterms)
end
def sort_by_number!
symbols.sort_by!(&:number)
end
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = find_symbol_by_token_id(token_id))
if replace
sym.id = id
sym.alias_name = alias_name
sym.tag = tag
end
return sym
end
if (sym = find_symbol_by_id(id))
return sym
end
@symbols = nil
term = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: true, token_id: token_id, nullable: false
)
@terms << term
term
end
def add_nterm(id:, alias_name: nil, tag: nil)
return if find_symbol_by_id(id)
@symbols = nil
nterm = Symbol.new(
id: id, alias_name: alias_name, number: nil, tag: tag,
term: false, token_id: nil, nullable: nil,
)
@nterms << nterm
nterm
end
def find_symbol_by_s_value(s_value)
symbols.find { |s| s.id.s_value == s_value }
end
def find_symbol_by_s_value!(s_value)
find_symbol_by_s_value(s_value) || (raise "Symbol not found: #{s_value}")
end
def find_symbol_by_id(id)
symbols.find do |s|
s.id == id || s.alias_name == id.s_value
end
end
def find_symbol_by_id!(id)
find_symbol_by_id(id) || (raise "Symbol not found: #{id}")
end
def find_symbol_by_token_id(token_id)
symbols.find {|s| s.token_id == token_id }
end
def find_symbol_by_number!(number)
sym = symbols[number]
raise "Symbol not found: #{number}" unless sym
raise "[BUG] Symbol number mismatch. #{number}, #{sym}" if sym.number != number
sym
end
def fill_symbol_number
# YYEMPTY = -2
# YYEOF = 0
# YYerror = 1
# YYUNDEF = 2
@number = 3
fill_terms_number
fill_nterms_number
end
def fill_nterm_type(types)
types.each do |type|
nterm = find_nterm_by_id!(type.id)
nterm.tag = type.tag
end
end
def fill_printer(printers)
symbols.each do |sym|
printers.each do |printer|
printer.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.printer = printer if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.printer = printer if sym.tag == ident_or_tag
else
raise "Unknown token type. #{printer}"
end
end
end
end
end
def fill_error_token(error_tokens)
symbols.each do |sym|
error_tokens.each do |token|
token.ident_or_tags.each do |ident_or_tag|
case ident_or_tag
when Lrama::Lexer::Token::Ident
sym.error_token = token if sym.id == ident_or_tag
when Lrama::Lexer::Token::Tag
sym.error_token = token if sym.tag == ident_or_tag
else
raise "Unknown token type. #{token}"
end
end
end
end
end
def token_to_symbol(token)
case token
when Lrama::Lexer::Token
find_symbol_by_id!(token)
else
raise "Unknown class: #{token}"
end
end
def validate!
validate_number_uniqueness!
validate_alias_name_uniqueness!
end
private
def find_nterm_by_id!(id)
@nterms.find do |s|
s.id == id
end || (raise "Symbol not found: #{id}")
end
def fill_terms_number
# Character literal in grammar file has
# token id corresponding to ASCII code by default,
# so start token_id from 256.
token_id = 256
@terms.each do |sym|
while used_numbers[@number] do
@number += 1
end
if sym.number.nil?
sym.number = @number
used_numbers[@number] = true
@number += 1
end
# If id is Token::Char, it uses ASCII code
if sym.token_id.nil?
if sym.id.is_a?(Lrama::Lexer::Token::Char)
# Ignore ' on the both sides
case sym.id.s_value[1..-2]
when "\\b"
sym.token_id = 8
when "\\f"
sym.token_id = 12
when "\\n"
sym.token_id = 10
when "\\r"
sym.token_id = 13
when "\\t"
sym.token_id = 9
when "\\v"
sym.token_id = 11
when "\""
sym.token_id = 34
when "'"
sym.token_id = 39
when "\\\\"
sym.token_id = 92
when /\A\\(\d+)\z/
unless (id = Integer($1, 8)).nil?
sym.token_id = id
else
raise "Unknown Char s_value #{sym}"
end
when /\A(.)\z/
unless (id = $1&.bytes&.first).nil?
sym.token_id = id
else
raise "Unknown Char s_value #{sym}"
end
else
raise "Unknown Char s_value #{sym}"
end
else
sym.token_id = token_id
token_id += 1
end
end
end
end
def fill_nterms_number
token_id = 0
@nterms.each do |sym|
while used_numbers[@number] do
@number += 1
end
if sym.number.nil?
sym.number = @number
used_numbers[@number] = true
@number += 1
end
if sym.token_id.nil?
sym.token_id = token_id
token_id += 1
end
end
end
def used_numbers
return @used_numbers if defined?(@used_numbers)
@used_numbers = {}
symbols.map(&:number).each do |n|
@used_numbers[n] = true
end
@used_numbers
end
def validate_number_uniqueness!
invalid = symbols.group_by(&:number).select do |number, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol number is duplicated. #{invalid}"
end
def validate_alias_name_uniqueness!
invalid = symbols.select(&:alias_name).group_by(&:alias_name).select do |alias_name, syms|
syms.count > 1
end
return if invalid.empty?
raise "Symbol alias name is duplicated. #{invalid}"
end
end
end
end
end

View File

@ -1,4 +1,5 @@
require "strscan"
require "lrama/lexer/grammar_file"
require "lrama/lexer/location"
require "lrama/lexer/token"
@ -26,6 +27,11 @@ module Lrama
%precedence
%prec
%error-token
%before-reduce
%after-reduce
%after-shift-error-token
%after-shift
%after-pop-stack
%empty
%code
%rule

View File

@ -38,7 +38,7 @@ module Lrama
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
return Lrama::Grammar::Reference.new(type: :dollar, number: Integer(scanner[2]), index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
@ -51,11 +51,22 @@ module Lrama
when scanner.scan(/@\$/) # @$
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
when scanner.scan(/@(\d+)/) # @1
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
return Lrama::Grammar::Reference.new(type: :at, number: Integer(scanner[1]), index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
# $: references
when scanner.scan(/\$:\$/) # $:$
return Lrama::Grammar::Reference.new(type: :index, name: "$", first_column: start, last_column: scanner.pos)
when scanner.scan(/\$:(\d+)/) # $:1
return Lrama::Grammar::Reference.new(type: :index, number: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
when scanner.scan(/\$:([a-zA-Z_][a-zA-Z0-9_]*)/) # $:foo, $:expr (named reference without brackets)
return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
when scanner.scan(/\$:\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $:[expr.right], $:[expr-right] (named reference with brackets)
return Lrama::Grammar::Reference.new(type: :index, name: scanner[1], first_column: start, last_column: scanner.pos)
end
end
end

View File

@ -16,8 +16,7 @@ module Lrama
def initialize(
out:, output_file_path:, template_name:, grammar_file_path:,
header_out: nil, header_file_path: nil,
context:, grammar:, error_recovery: false
context:, grammar:, header_out: nil, header_file_path: nil, error_recovery: false
)
@out = out
@output_file_path = output_file_path
@ -162,6 +161,61 @@ module Lrama
STR
end
def after_shift_function(comment = "")
return "" unless @grammar.after_shift
<<-STR
#{comment}
#line #{@grammar.after_shift.line} "#{@grammar_file_path}"
{#{@grammar.after_shift.s_value}(#{parse_param_name});}
#line [@oline@] [@ofile@]
STR
end
def before_reduce_function(comment = "")
return "" unless @grammar.before_reduce
<<-STR
#{comment}
#line #{@grammar.before_reduce.line} "#{@grammar_file_path}"
{#{@grammar.before_reduce.s_value}(yylen#{user_args});}
#line [@oline@] [@ofile@]
STR
end
def after_reduce_function(comment = "")
return "" unless @grammar.after_reduce
<<-STR
#{comment}
#line #{@grammar.after_reduce.line} "#{@grammar_file_path}"
{#{@grammar.after_reduce.s_value}(yylen#{user_args});}
#line [@oline@] [@ofile@]
STR
end
def after_shift_error_token_function(comment = "")
return "" unless @grammar.after_shift_error_token
<<-STR
#{comment}
#line #{@grammar.after_shift_error_token.line} "#{@grammar_file_path}"
{#{@grammar.after_shift_error_token.s_value}(#{parse_param_name});}
#line [@oline@] [@ofile@]
STR
end
def after_pop_stack_function(len, comment = "")
return "" unless @grammar.after_pop_stack
<<-STR
#{comment}
#line #{@grammar.after_pop_stack.line} "#{@grammar_file_path}"
{#{@grammar.after_pop_stack.s_value}(#{len}#{user_args});}
#line [@oline@] [@ofile@]
STR
end
def symbol_actions_for_error_token
str = ""

File diff suppressed because it is too large Load Diff

View File

@ -29,8 +29,8 @@ module Lrama
end
def non_default_reduces
reduces.select do |reduce|
reduce.rule != @default_reduction_rule
reduces.reject do |reduce|
reduce.rule == @default_reduction_rule
end
end
@ -105,8 +105,8 @@ module Lrama
end
def selected_term_transitions
term_transitions.select do |shift, next_state|
!shift.not_selected
term_transitions.reject do |shift, next_state|
shift.not_selected
end
end

View File

@ -1,8 +1,14 @@
# TODO: Validate position is not over rule rhs
require "forwardable"
module Lrama
class States
class Item < Struct.new(:rule, :position, keyword_init: true)
extend Forwardable
def_delegators "rule", :lhs, :rhs
# Optimization for States#setup_state
def hash
[rule_id, position].hash
@ -20,14 +26,6 @@ module Lrama
rhs.count - position
end
def lhs
rule.lhs
end
def rhs
rule.rhs
end
def next_sym
rhs[position]
end

View File

@ -109,8 +109,8 @@ module Lrama
io << "\n"
# Report shifts
tmp = state.term_transitions.select do |shift, _|
!shift.not_selected
tmp = state.term_transitions.reject do |shift, _|
shift.not_selected
end.map do |shift, next_state|
[shift.next_sym, next_state.id]
end

View File

@ -1,3 +1,3 @@
module Lrama
VERSION = "0.6.2".freeze
VERSION = "0.6.3".freeze
end

View File

@ -1752,6 +1752,7 @@ yybackup:
*++yyvsp = yylval;
YY_IGNORE_MAYBE_UNINITIALIZED_END
*++yylsp = yylloc;
<%= output.after_shift_function("/* %after-shift code. */") %>
/* Discard the shifted token. */
yychar = YYEMPTY;
@ -1784,6 +1785,7 @@ yyreduce:
unconditionally makes the parser a bit smaller, and it avoids a
GCC warning that YYVAL may be used uninitialized. */
yyval = yyvsp[1-yylen];
<%= output.before_reduce_function("/* %before-reduce function. */") %>
/* Default location. */
YYLLOC_DEFAULT (yyloc, (yylsp - yylen), yylen);
@ -1809,6 +1811,7 @@ yyreduce:
YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc<%= output.user_args %>);
YYPOPSTACK (yylen);
<%= output.after_reduce_function("/* %after-reduce function. */") %>
yylen = 0;
*++yyvsp = yyval;
@ -1910,6 +1913,7 @@ yyerrorlab:
/* Do not reclaim the symbols of the rule whose action triggered
this YYERROR. */
YYPOPSTACK (yylen);
<%= output.after_pop_stack_function("yylen", "/* %after-pop-stack function. */") %>
yylen = 0;
YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>);
yystate = *yyssp;
@ -1969,6 +1973,7 @@ yyerrlab1:
yydestruct ("Error: popping",
YY_ACCESSING_SYMBOL (yystate), yyvsp, yylsp<%= output.user_args %>);
YYPOPSTACK (1);
<%= output.after_pop_stack_function(1, "/* %after-pop-stack function. */") %>
yystate = *yyssp;
YY_STACK_PRINT (yyss, yyssp<%= output.user_args %>);
}
@ -1983,6 +1988,7 @@ yyerrlab1:
/* Shift the error token. */
YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp<%= output.user_args %>);
<%= output.after_shift_error_token_function("/* %after-shift-error-token code. */") %>
yystate = yyn;
goto yynewstate;