Lrama v0.6.0

This commit is contained in:
yui-knk 2023-12-25 18:40:50 +09:00 committed by Yuichiro Kaneko
parent 9cf1c2bb0c
commit 12b69bf515
20 changed files with 1000 additions and 612 deletions

View File

@ -1,5 +1,25 @@
# NEWS for Lrama
## Lrama 0.6.0 (2023-12-25)
### User defined parameterizing rules
Allow to define parameterizing rule by `%rule` directive.
```
%rule pair(X, Y): X Y { $$ = $1 + $2; }
;
%%
program: stmt
;
stmt: pair(ODD, EVEN) <num>
| pair(EVEN, ODD) <num>
;
```
## Lrama 0.5.11 (2023-12-02)
### Type specification of parameterizing rules

View File

@ -8,6 +8,10 @@ require "lrama/grammar/printer"
require "lrama/grammar/reference"
require "lrama/grammar/rule"
require "lrama/grammar/rule_builder"
require "lrama/grammar/parameterizing_rule_builder"
require "lrama/grammar/parameterizing_rule_resolver"
require "lrama/grammar/parameterizing_rule_rhs_builder"
require "lrama/grammar/parameterizing_rule"
require "lrama/grammar/symbol"
require "lrama/grammar/type"
require "lrama/grammar/union"
@ -36,6 +40,7 @@ module Lrama
@rule_builders = []
@rules = []
@sym_to_rules = {}
@parameterizing_resolver = ParameterizingRuleResolver.new
@empty_symbol = nil
@eof_symbol = nil
@error_symbol = nil
@ -69,7 +74,7 @@ module Lrama
return sym
end
if sym = @symbols.find {|s| s.id == id }
if (sym = @symbols.find {|s| s.id == id })
return sym
end
@ -129,6 +134,10 @@ module Lrama
@rule_builders << builder
end
def add_parameterizing_rule_builder(builder)
@parameterizing_resolver.add_parameterizing_rule_builder(builder)
end
def prologue_first_lineno=(prologue_first_lineno)
@aux.prologue_first_lineno = prologue_first_lineno
end
@ -310,7 +319,7 @@ module Lrama
def setup_rules
@rule_builders.each do |builder|
builder.setup_rules
builder.setup_rules(@parameterizing_resolver)
end
end
@ -350,56 +359,21 @@ module Lrama
@accept_symbol = term
end
# 1. Add $accept rule to the top of rules
# 2. Extract action in the middle of RHS into new Empty rule
# 3. Append id and extract action then create Rule
#
# Bison 3.8.2 uses different orders for symbol number and rule number
# when a rule has actions in the middle of a rule.
#
# For example,
#
# `program: $@1 top_compstmt`
#
# Rules are ordered like below,
#
# 1 $@1: ε
# 2 program: $@1 top_compstmt
#
# Symbols are ordered like below,
#
# 164 program
# 165 $@1
#
def normalize_rules
# 1. Add $accept rule to the top of rules
accept = @accept_symbol
eof = @eof_symbol
# Add $accept rule to the top of rules
lineno = @rule_builders.first ? @rule_builders.first.line : 0
@rules << Rule.new(id: @rule_counter.increment, _lhs: accept.id, _rhs: [@rule_builders.first.lhs, eof.id], token_code: nil, lineno: lineno)
@rules << Rule.new(id: @rule_counter.increment, _lhs: @accept_symbol.id, _rhs: [@rule_builders.first.lhs, @eof_symbol.id], token_code: nil, lineno: lineno)
setup_rules
@rule_builders.each do |builder|
# Extract actions in the middle of RHS into new rules.
builder.midrule_action_rules.each do |rule|
@rules << rule
end
builder.rules.each do |rule|
add_nterm(id: rule._lhs)
@rules << rule
end
builder.parameterizing_rules.each do |rule|
add_nterm(id: rule._lhs, tag: rule.lhs_tag)
@rules << rule
end
builder.midrule_action_rules.each do |rule|
add_nterm(id: rule._lhs)
end
end
@rules.sort_by!(&:id)
end
# Collect symbols from rules

View File

@ -1,12 +1,29 @@
require "forwardable"
require "lrama/grammar/code/initial_action_code"
require "lrama/grammar/code/no_reference_code"
require "lrama/grammar/code/printer_code"
require "lrama/grammar/code/rule_action"
module Lrama
class Grammar
class Code < Struct.new(:type, :token_code, keyword_init: true)
class Code
extend Forwardable
def_delegators "token_code", :s_value, :line, :column, :references
attr_reader :type, :token_code
def initialize(type:, token_code:)
@type = type
@token_code = token_code
end
def ==(other)
self.class == other.class &&
self.type == other.type &&
self.token_code == other.token_code
end
# $$, $n, @$, @n are translated to C code
def translated_code
t_code = s_value.dup
@ -17,7 +34,7 @@ module Lrama
str = reference_to_c(ref)
t_code[first_column..last_column] = str
t_code[first_column...last_column] = str
end
return t_code
@ -31,8 +48,3 @@ module Lrama
end
end
end
require "lrama/grammar/code/initial_action_code"
require "lrama/grammar/code/no_reference_code"
require "lrama/grammar/code/printer_code"
require "lrama/grammar/code/rule_action"

View File

@ -2,7 +2,7 @@ module Lrama
class Grammar
class Code
class PrinterCode < Code
def initialize(type: nil, token_code: nil, tag: nil)
def initialize(type:, token_code:, tag:)
super(type: type, token_code: token_code)
@tag = tag
end

View File

@ -2,7 +2,7 @@ module Lrama
class Grammar
class Code
class RuleAction < Code
def initialize(type: nil, token_code: nil, rule: nil)
def initialize(type:, token_code:, rule:)
super(type: type, token_code: token_code)
@rule = rule
end

View File

@ -0,0 +1,6 @@
module Lrama
class Grammar
class ParameterizingRule < Struct.new(:rules, :token, keyword_init: true)
end
end
end

View File

@ -0,0 +1,34 @@
module Lrama
class Grammar
class ParameterizingRuleBuilder
attr_reader :name, :parameters, :rhs
def initialize(name, parameters, rhs)
@name = name
@parameters = parameters
@rhs = rhs
@required_parameters_count = parameters.count
end
def build_rules(token, actual_args, rule_counter, lhs_tag, line, rule_builders)
validate_argument_number!(token)
lhs = lhs(actual_args)
@rhs.map do |rhs|
rhs.build_rules(token, actual_args, parameters, rule_counter, lhs, lhs_tag, line, rule_builders)
end.flatten
end
private
def validate_argument_number!(token)
unless @required_parameters_count == token.args.count
raise "Invalid number of arguments. expect: #{@required_parameters_count} actual: #{token.args.count}"
end
end
def lhs(actual_args)
Lrama::Lexer::Token::Ident.new(s_value: "#{name}_#{actual_args.map(&:s_value).join('_')}")
end
end
end
end

View File

@ -0,0 +1,30 @@
module Lrama
class Grammar
class ParameterizingRuleResolver
def initialize
@parameterizing_rule_builders = []
end
def add_parameterizing_rule_builder(builder)
@parameterizing_rule_builders << builder
end
def defined?(name)
!rule_builders(name).empty?
end
def build_rules(token, rule_counter, lhs_tag, line)
builder = rule_builders(token.s_value).last
raise "Unknown parameterizing rule #{token.s_value} at line #{token.line}" unless builder
builder.build_rules(token, token.args, rule_counter, lhs_tag, line, @parameterizing_rule_builders)
end
private
def rule_builders(name)
@parameterizing_rule_builders.select { |builder| builder.name == name }
end
end
end
end

View File

@ -0,0 +1,53 @@
module Lrama
class Grammar
class ParameterizingRuleRhsBuilder
attr_accessor :symbols, :user_code, :precedence_sym
def initialize
@symbols = []
@user_code = nil
@precedence_sym = nil
end
def build_rules(token, actual_args, parameters, rule_counter, lhs, lhs_tag, line, rule_builders)
nested_rules = build_nested_rules(token, actual_args, parameters, rule_counter, lhs_tag, line, rule_builders)
rule = Rule.new(id: rule_counter.increment, _lhs: lhs, _rhs: rhs(token, actual_args, parameters, nested_rules.last), lhs_tag: lhs_tag, token_code: user_code, precedence_sym: precedence_sym, lineno: line)
ParameterizingRule.new(rules: nested_rules.map(&:rules) + [rule], token: lhs)
end
private
def build_nested_rules(token, actual_args, parameters, rule_counter, lhs_tag, line, rule_builders)
symbols.each_with_index.map do |sym, i|
next unless sym.is_a?(Lexer::Token::InstantiateRule)
builder = rule_builders.select { |builder| builder.name == sym.s_value }.last
raise "Unknown parameterizing rule #{token.s_value} at line #{token.line}" unless builder
builder.build_rules(sym, nested_actual_args(actual_args, parameters, i), rule_counter, lhs_tag, line, rule_builders)
end.flatten.compact
end
def nested_actual_args(actual_args, parameters, idx)
symbols[idx].args.map do |arg|
i = parameters.index { |parameter| parameter.s_value == arg.s_value }
i.nil? ? arg : actual_args[i]
end
end
def rhs(token, actual_args, parameters, nested_rule)
symbols.map do |sym|
if sym.is_a?(Lexer::Token::InstantiateRule)
sym.args.map do |arg|
idx = parameters.index { |parameter| parameter.s_value == arg.s_value }
idx.nil? ? sym : nested_rule&.token
end
else
idx = parameters.index { |parameter| parameter.s_value == sym.s_value }
idx.nil? ? sym : actual_args[idx]
end
end.flatten
end
end
end
end

View File

@ -51,22 +51,14 @@ module Lrama
freeze_rhs
end
def setup_rules
def setup_rules(parameterizing_resolver)
preprocess_references unless @skip_preprocess_references
process_rhs
process_rhs(parameterizing_resolver)
build_rules
end
def parameterizing_rules
@parameterizing_rules
end
def midrule_action_rules
@midrule_action_rules
end
def rules
@rules
@parameterizing_rules + @midrule_action_rules + @rules
end
private
@ -95,9 +87,9 @@ module Lrama
end
end
# rhs is a mixture of variety type of tokens like `Ident`, `Parameterizing`, `UserCode` and so on.
# rhs is a mixture of variety type of tokens like `Ident`, `InstantiateRule`, `UserCode` and so on.
# `#process_rhs` replaces some kind of tokens to `Ident` so that all `@replaced_rhs` are `Ident` or `Char`.
def process_rhs
def process_rhs(parameterizing_resolver)
return if @replaced_rhs
@replaced_rhs = []
@ -109,12 +101,17 @@ module Lrama
@replaced_rhs << token
when Lrama::Lexer::Token::Ident
@replaced_rhs << token
when Lrama::Lexer::Token::Parameterizing
parameterizing = ParameterizingRules::Builder.new(token, @rule_counter, @lhs_tag, user_code, precedence_sym, line)
parameterizing.build.each do |r|
@parameterizing_rules << r
when Lrama::Lexer::Token::InstantiateRule
if parameterizing_resolver.defined?(token.rule_name)
parameterizing = parameterizing_resolver.build_rules(token, @rule_counter, @lhs_tag, line)
@parameterizing_rules = @parameterizing_rules + parameterizing.map(&:rules).flatten
@replaced_rhs = @replaced_rhs + parameterizing.map(&:token).flatten.uniq
else
# TODO: Delete when the standard library will defined as a grammar file.
parameterizing = ParameterizingRules::Builder.new(token, @rule_counter, @lhs_tag, user_code, precedence_sym, line)
@parameterizing_rules = @parameterizing_rules + parameterizing.build
@replaced_rhs << parameterizing.build_token
end
@replaced_rhs << parameterizing.build_token
when Lrama::Lexer::Token::UserCode
prefix = token.referred ? "@" : "$@"
new_token = Lrama::Lexer::Token::Ident.new(s_value: prefix + @midrule_action_counter.increment.to_s)
@ -124,7 +121,7 @@ module Lrama
rule_builder.lhs = new_token
rule_builder.user_code = token
rule_builder.complete_input
rule_builder.setup_rules
rule_builder.setup_rules(parameterizing_resolver)
@rule_builders_for_derived_rules << rule_builder
else
@ -146,8 +143,15 @@ module Lrama
else
candidates = rhs.each_with_index.select {|token, i| token.referred_by?(ref_name) }
raise "Referring symbol `#{ref_name}` is duplicated. #{token}" if candidates.size >= 2
raise "Referring symbol `#{ref_name}` is not found. #{token}" unless referring_symbol = candidates.first
if candidates.size >= 2
location = token.location.partial_location(ref.first_column, ref.last_column)
raise location.generate_error_message("Referring symbol `#{ref_name}` is duplicated.")
end
unless (referring_symbol = candidates.first)
location = token.location.partial_location(ref.first_column, ref.last_column)
raise location.generate_error_message("Referring symbol `#{ref_name}` is not found.")
end
ref.index = referring_symbol[1] + 1
end
@ -167,7 +171,7 @@ module Lrama
end
def flush_user_code
if c = @user_code
if (c = @user_code)
@rhs << c
@user_code = nil
end

View File

@ -1,4 +1,5 @@
require "strscan"
require "lrama/lexer/grammar_file"
require "lrama/lexer/location"
require "lrama/lexer/token"
@ -28,10 +29,12 @@ module Lrama
%error-token
%empty
%code
%rule
)
def initialize(text)
@scanner = StringScanner.new(text)
def initialize(grammar_file)
@grammar_file = grammar_file
@scanner = StringScanner.new(grammar_file.text)
@head_column = @head = @scanner.pos
@head_line = @line = 1
@status = :initial
@ -57,8 +60,9 @@ module Lrama
def location
Location.new(
grammar_file: @grammar_file,
first_line: @head_line, first_column: @head_column,
last_line: @line, last_column: column
last_line: line, last_column: column
)
end
@ -78,8 +82,7 @@ module Lrama
end
end
@head_line = line
@head_column = column
reset_first_position
case
when @scanner.eos?
@ -117,6 +120,8 @@ module Lrama
def lex_c_code
nested = 0
code = ''
reset_first_position
while !@scanner.eos? do
case
when @scanner.scan(/{/)
@ -140,12 +145,12 @@ module Lrama
@line += @scanner.matched.count("\n")
when @scanner.scan(/'.*?'/)
code += %Q(#{@scanner.matched})
when @scanner.scan(/[^\"'\{\}\n]+/)
code += @scanner.matched
when @scanner.scan(/#{Regexp.escape(@end_symbol)}/)
code += @scanner.matched
else
if @scanner.scan(/[^\"'\{\}\n#{@end_symbol}]+/)
code += @scanner.matched
else
code += @scanner.getch
end
code += @scanner.getch
end
end
raise ParseError, "Unexpected code: #{code}."
@ -166,9 +171,14 @@ module Lrama
end
end
def reset_first_position
@head_line = line
@head_column = column
end
def newline
@line += 1
@head = @scanner.pos + 1
@head = @scanner.pos
end
end
end

View File

@ -0,0 +1,21 @@
module Lrama
class Lexer
class GrammarFile
attr_reader :path, :text
def initialize(path, text)
@path = path
@text = text
end
def ==(other)
self.class == other.class &&
self.path == other.path
end
def lines
@lines ||= text.split("\n")
end
end
end
end

View File

@ -1,9 +1,10 @@
module Lrama
class Lexer
class Location
attr_reader :first_line, :first_column, :last_line, :last_column
attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column
def initialize(first_line:, first_column:, last_line:, last_column:)
def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:)
@grammar_file = grammar_file
@first_line = first_line
@first_column = first_column
@last_line = last_line
@ -12,11 +13,85 @@ module Lrama
def ==(other)
self.class == other.class &&
self.grammar_file == other.grammar_file &&
self.first_line == other.first_line &&
self.first_column == other.first_column &&
self.last_line == other.last_line &&
self.last_column == other.last_column
end
def partial_location(left, right)
offset = -first_column
new_first_line = -1
new_first_column = -1
new_last_line = -1
new_last_column = -1
_text.each.with_index do |line, index|
new_offset = offset + line.length + 1
if offset <= left && left <= new_offset
new_first_line = first_line + index
new_first_column = left - offset
end
if offset <= right && right <= new_offset
new_last_line = first_line + index
new_last_column = right - offset
end
offset = new_offset
end
Location.new(
grammar_file: grammar_file,
first_line: new_first_line, first_column: new_first_column,
last_line: new_last_line, last_column: new_last_column
)
end
def to_s
"#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})"
end
def generate_error_message(error_message)
<<~ERROR.chomp
#{path}:#{first_line}:#{first_column}: #{error_message}
#{line_with_carets}
ERROR
end
def line_with_carets
<<~TEXT
#{text}
#{carets}
TEXT
end
private
def path
grammar_file.path
end
def blanks
(text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ')
end
def carets
blanks + '^' * (last_column - first_column)
end
def text
@text ||= _text.join("\n")
end
def _text
@_text ||=begin
range = (first_line - 1)...last_line
grammar_file.lines[range] or raise "#{range} is invalid"
end
end
end
end
end

View File

@ -1,6 +1,6 @@
require 'lrama/lexer/token/char'
require 'lrama/lexer/token/ident'
require 'lrama/lexer/token/parameterizing'
require 'lrama/lexer/token/instantiate_rule'
require 'lrama/lexer/token/tag'
require 'lrama/lexer/token/user_code'

View File

@ -0,0 +1,18 @@
module Lrama
class Lexer
class Token
class InstantiateRule < Token
attr_accessor :args
def initialize(s_value:, alias_name: nil, location: nil, args: [])
super s_value: s_value, alias_name: alias_name, location: location
@args = args
end
def rule_name
s_value
end
end
end
end
end

View File

@ -1,34 +0,0 @@
module Lrama
class Lexer
class Token
class Parameterizing < Token
attr_accessor :args
def initialize(s_value:, alias_name: nil, location: nil, args: [])
super s_value: s_value, alias_name: alias_name, location: location
@args = args
end
def option?
%w(option ?).include?(self.s_value)
end
def nonempty_list?
%w(nonempty_list +).include?(self.s_value)
end
def list?
%w(list *).include?(self.s_value)
end
def separated_nonempty_list?
%w(separated_nonempty_list).include?(self.s_value)
end
def separated_list?
%w(separated_list).include?(self.s_value)
end
end
end
end
end

View File

@ -35,27 +35,27 @@ module Lrama
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\$/) # $$, $<long>$
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
return Lrama::Grammar::Reference.new(type: :dollar, name: "$", ex_tag: tag, first_column: start, last_column: scanner.pos)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?(\d+)/) # $1, $2, $<long>1
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
return Lrama::Grammar::Reference.new(type: :dollar, index: Integer(scanner[2]), ex_tag: tag, first_column: start, last_column: scanner.pos)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?([a-zA-Z_][a-zA-Z0-9_]*)/) # $foo, $expr, $<long>program (named reference without brackets)
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $expr.right, $expr-right, $<long>program (named reference with brackets)
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
when scanner.scan(/\$(<[a-zA-Z0-9_]+>)?\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # $[expr.right], $[expr-right], $<long>[expr.right] (named reference with brackets)
tag = scanner[1] ? Lrama::Lexer::Token::Tag.new(s_value: scanner[1]) : nil
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos - 1)
return Lrama::Grammar::Reference.new(type: :dollar, name: scanner[2], ex_tag: tag, first_column: start, last_column: scanner.pos)
# @ references
# It need to wrap an identifier with brackets to use ".-" for identifiers
when scanner.scan(/@\$/) # @$
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos - 1)
return Lrama::Grammar::Reference.new(type: :at, name: "$", first_column: start, last_column: scanner.pos)
when scanner.scan(/@(\d+)/) # @1
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos - 1)
return Lrama::Grammar::Reference.new(type: :at, index: Integer(scanner[1]), first_column: start, last_column: scanner.pos)
when scanner.scan(/@([a-zA-Z][a-zA-Z0-9_]*)/) # @foo, @expr (named reference without brackets)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @expr.right, @expr-right (named reference with brackets)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos - 1)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
when scanner.scan(/@\[([a-zA-Z_.][-a-zA-Z0-9_.]*)\]/) # @[expr.right], @[expr-right] (named reference with brackets)
return Lrama::Grammar::Reference.new(type: :at, name: scanner[1], first_column: start, last_column: scanner.pos)
end
end
end

File diff suppressed because it is too large Load Diff

View File

@ -156,7 +156,7 @@ module Lrama
nl = true
end
if r = state.default_reduction_rule
if (r = state.default_reduction_rule)
nl = true
s = "$default".ljust(max_len)

View File

@ -1,3 +1,3 @@
module Lrama
VERSION = "0.5.12".freeze
VERSION = "0.6.0".freeze
end