Lrama v0.5.3

This commit is contained in:
yui-knk 2023-08-04 19:16:38 +09:00 committed by Yuichiro Kaneko
parent e80752f9bb
commit f07ef1d54c
Notes: git 2023-08-05 01:33:01 +00:00
29 changed files with 773 additions and 560 deletions

View File

@ -5,22 +5,7 @@ mentioned below.
## GNU General Public License version 3
These files are licensed under the GNU General Public License version 3. See these files for more information.
These files are licensed under the GNU General Public License version 3 or later. See these files for more information.
* template/bison/yacc.c
* template/bison/yacc.h
## Same with Ruby
These files are licensed same with Ruby. See https://github.com/ruby/ruby/blob/master/COPYING for more information.
* spec/fixtures/integration/ruby_3_0_5/parse.tmp.y
* spec/fixtures/integration/ruby_3_0_5/y.tab.c
* spec/fixtures/integration/ruby_3_0_5/y.tab.h
* spec/fixtures/integration/ruby_3_1_0/parse.tmp.y
* spec/fixtures/integration/ruby_3_1_0/y.tab.c
* spec/fixtures/integration/ruby_3_1_0/y.tab.h
* spec/fixtures/integration/ruby_3_2_0/parse.tmp.y
* spec/fixtures/integration/ruby_3_2_0/y.tab.c
* spec/fixtures/integration/ruby_3_2_0/y.tab.h

14
tool/lrama/exe/lex_state Executable file
View File

@ -0,0 +1,14 @@
#!/usr/bin/env ruby
$LOAD_PATH << File.join(__dir__, "../lib")
require "lrama"
grammar_file = ARGV.shift
y = File.read(grammar_file)
warning = Lrama::Warning.new
grammar = Lrama::Parser.new(y).parse
states = Lrama::States.new(grammar, warning)
states.compute
Lrama::LexState.new(states).compute

View File

@ -4,4 +4,4 @@
$LOAD_PATH << File.join(__dir__, "../lib")
require "lrama"
Lrama::Command.new.run(ARGV.dup)
Lrama::Command.new(ARGV.dup).run

View File

@ -2,94 +2,57 @@ require 'optparse'
module Lrama
class Command
def run(argv)
opt = OptionParser.new
def initialize(argv)
@argv = argv
# opt.on('-h') {|v| p v }
opt.on('-V', '--version') {|v| puts Lrama::VERSION ; exit 0 }
@version = nil
@skeleton = "bison/yacc.c"
@header = false
@header_file = nil
@report = []
@report_file = nil
@outfile = "y.tab.c"
@trace = []
@error_recovery = false
@grammar_file = nil
@report_file = nil
@trace_opts = nil
@report_opts = nil
end
# Tuning the Parser
skeleton = "bison/yacc.c"
def run
parse_option
opt.on('-S', '--skeleton=FILE') {|v| skeleton = v }
opt.on('-t') { } # Do nothing
# Output Files:
header = false
header_file = nil
report = []
report_file = nil
outfile = "y.tab.c"
opt.on('-h', '--header=[FILE]') {|v| header = true; header_file = v }
opt.on('-d') { header = true }
opt.on('-r', '--report=THINGS') {|v| report = v.split(',') }
opt.on('--report-file=FILE') {|v| report_file = v }
opt.on('-v') { } # Do nothing
opt.on('-o', '--output=FILE') {|v| outfile = v }
# Hidden
trace = []
opt.on('--trace=THINGS') {|v| trace = v.split(',') }
# Error Recovery
error_recovery = false
opt.on('-e') {|v| error_recovery = true }
opt.parse!(argv)
trace_opts = validate_trace(trace)
report_opts = validate_report(report)
grammar_file = argv.shift
if !report.empty? && report_file.nil? && grammar_file
report_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".output"
if @version
puts Lrama::VERSION
exit 0
end
if !header_file && header
case
when outfile
header_file = File.dirname(outfile) + "/" + File.basename(outfile, ".*") + ".h"
when grammar_file
header_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".h"
end
end
if !grammar_file
abort "File should be specified\n"
end
Report::Duration.enable if trace_opts[:time]
Report::Duration.enable if @trace_opts[:time]
warning = Lrama::Warning.new
if grammar_file == '-'
grammar_file = argv.shift or abort "File name for STDIN should be specified\n"
y = STDIN.read
else
y = File.read(grammar_file)
end
grammar = Lrama::Parser.new(y).parse
states = Lrama::States.new(grammar, warning, trace_state: (trace_opts[:automaton] || trace_opts[:closure]))
grammar = Lrama::Parser.new(@y.read).parse
states = Lrama::States.new(grammar, warning, trace_state: (@trace_opts[:automaton] || @trace_opts[:closure]))
states.compute
context = Lrama::Context.new(states)
if report_file
if @report_file
reporter = Lrama::StatesReporter.new(states)
File.open(report_file, "w+") do |f|
reporter.report(f, **report_opts)
File.open(@report_file, "w+") do |f|
reporter.report(f, **@report_opts)
end
end
File.open(outfile, "w+") do |f|
File.open(@outfile, "w+") do |f|
Lrama::Output.new(
out: f,
output_file_path: outfile,
template_name: skeleton,
grammar_file_path: grammar_file,
header_file_path: header_file,
output_file_path: @outfile,
template_name: @skeleton,
grammar_file_path: @grammar_file,
header_file_path: @header_file,
context: context,
grammar: grammar,
error_recovery: @error_recovery,
).render
end
@ -144,5 +107,61 @@ module Lrama
return h
end
def parse_option
opt = OptionParser.new
# opt.on('-h') {|v| p v }
opt.on('-V', '--version') {|v| @version = true }
# Tuning the Parser
opt.on('-S', '--skeleton=FILE') {|v| @skeleton = v }
opt.on('-t') { } # Do nothing
# Output Files:
opt.on('-h', '--header=[FILE]') {|v| @header = true; @header_file = v }
opt.on('-d') { @header = true }
opt.on('-r', '--report=THINGS') {|v| @report = v.split(',') }
opt.on('--report-file=FILE') {|v| @report_file = v }
opt.on('-v') { } # Do nothing
opt.on('-o', '--output=FILE') {|v| @outfile = v }
# Hidden
opt.on('--trace=THINGS') {|v| @trace = v.split(',') }
# Error Recovery
opt.on('-e') {|v| @error_recovery = true }
opt.parse!(@argv)
@trace_opts = validate_trace(@trace)
@report_opts = validate_report(@report)
@grammar_file = @argv.shift
if !@grammar_file
abort "File should be specified\n"
end
if @grammar_file == '-'
@grammar_file = @argv.shift or abort "File name for STDIN should be specified\n"
@y = STDIN
else
@y = File.open(@grammar_file, 'r')
end
if !@report.empty? && @report_file.nil? && @grammar_file
@report_file = File.dirname(@grammar_file) + "/" + File.basename(@grammar_file, ".*") + ".output"
end
if !@header_file && @header
case
when @outfile
@header_file = File.dirname(@outfile) + "/" + File.basename(@outfile, ".*") + ".h"
when @grammar_file
@header_file = File.dirname(@grammar_file) + "/" + File.basename(@grammar_file, ".*") + ".h"
end
end
end
end
end

View File

@ -1,4 +1,4 @@
require "lrama/report"
require "lrama/report/duration"
module Lrama
# This is passed to a template
@ -89,6 +89,16 @@ module Lrama
return a
end
def yytranslate_inverted
a = Array.new(@states.symbols.count, @states.undef_symbol.token_id)
@states.terms.each do |term|
a[term.number] = term.token_id
end
return a
end
# Mapping from rule number to line number of the rule is defined.
# Dummy rule is appended as the first element whose value is 0
# because 0 means error in yydefact.

View File

@ -1,283 +1,15 @@
require "forwardable"
require "lrama/grammar/code"
require "lrama/grammar/error_token"
require "lrama/grammar/precedence"
require "lrama/grammar/printer"
require "lrama/grammar/reference"
require "lrama/grammar/rule"
require "lrama/grammar/symbol"
require "lrama/grammar/union"
require "lrama/lexer"
module Lrama
Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do
# TODO: Change this to display_name
def to_s
l = lhs.id.s_value
r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
"#{l} -> #{r}"
end
# Used by #user_actions
def as_comment
l = lhs.id.s_value
r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ")
"#{l}: #{r}"
end
def precedence
precedence_sym && precedence_sym.precedence
end
def initial_rule?
id == 0
end
def translated_code
if code
code.translated_code
else
nil
end
end
end
# Symbol is both of nterm and term
# `number` is both for nterm and term
# `token_id` is tokentype for term, internal sequence number for nterm
#
# TODO: Add validation for ASCII code range for Token::Char
Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def term?
term
end
def nterm?
!term
end
def eof_symbol?
!!@eof_symbol
end
def error_symbol?
!!@error_symbol
end
def undef_symbol?
!!@undef_symbol
end
def accept_symbol?
!!@accept_symbol
end
def display_name
if alias_name
alias_name
else
id.s_value
end
end
# name for yysymbol_kind_t
#
# See: b4_symbol_kind_base
def enum_name
case
when accept_symbol?
name = "YYACCEPT"
when eof_symbol?
name = "YYEOF"
when term? && id.type == Token::Char
if alias_name
name = number.to_s + alias_name
else
name = number.to_s + id.s_value
end
when term? && id.type == Token::Ident
name = id.s_value
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
name = number.to_s + id.s_value
when nterm?
name = id.s_value
else
raise "Unexpected #{self}"
end
"YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
end
# comment for yysymbol_kind_t
def comment
case
when accept_symbol?
# YYSYMBOL_YYACCEPT
id.s_value
when eof_symbol?
# YYEOF
alias_name
when (term? && 0 < token_id && token_id < 128)
# YYSYMBOL_3_backslash_, YYSYMBOL_14_
alias_name || id.s_value
when id.s_value.include?("$") || id.s_value.include?("@")
# YYSYMBOL_21_1
id.s_value
else
# YYSYMBOL_keyword_class, YYSYMBOL_strings_1
alias_name || id.s_value
end
end
end
Type = Struct.new(:id, :tag, keyword_init: true)
Code = Struct.new(:type, :token_code, keyword_init: true) do
extend Forwardable
def_delegators "token_code", :s_value, :line, :column, :references
# $$, $n, @$, @n is translated to C code
def translated_code
case type
when :user_code
translated_user_code
when :initial_action
translated_initial_action_code
end
end
# * ($1) error
# * ($$) *yyvaluep
# * (@1) error
# * (@$) *yylocationp
def translated_printer_code(tag)
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = tag.s_value[1..-2]
str = "((*yyvaluep).#{member})"
when ref.value == "$" && ref.type == :at # @$
str = "(*yylocationp)"
when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in %printer."
when ref.type == :at # @n
raise "@#{ref.value} can not be used in %printer."
else
raise "Unexpected. #{self}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
private
# * ($1) yyvsp[i]
# * ($$) yyval
# * (@1) yylsp[i]
# * (@$) yyloc
def translated_user_code
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyval.#{member})"
when ref.value == "$" && ref.type == :at # @$
str = "(yyloc)"
when ref.type == :dollar # $n
i = -ref.position_in_rhs + ref.value
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyvsp[#{i}].#{member})"
when ref.type == :at # @n
i = -ref.position_in_rhs + ref.value
str = "(yylsp[#{i}])"
else
raise "Unexpected. #{self}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
# * ($1) error
# * ($$) yylval
# * (@1) error
# * (@$) yylloc
def translated_initial_action_code
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.value == "$" && ref.type == :dollar # $$
str = "yylval"
when ref.value == "$" && ref.type == :at # @$
str = "yylloc"
when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in initial_action."
when ref.type == :at # @n
raise "@#{ref.value} can not be used in initial_action."
else
raise "Unexpected. #{self}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
end
# type: :dollar or :at
# ex_tag: "$<tag>1" (Optional)
Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do
def tag
if ex_tag
ex_tag
else
referring_symbol.tag
end
end
end
Precedence = Struct.new(:type, :precedence, keyword_init: true) do
include Comparable
def <=>(other)
self.precedence <=> other.precedence
end
end
Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do
def translated_code(member)
code.translated_printer_code(member)
end
end
Union = Struct.new(:code, :lineno, keyword_init: true) do
def braces_less_code
# Remove braces
code.s_value[1..-2]
end
end
Token = Lrama::Lexer::Token
# Grammar is the result of parsing an input grammar file
@ -287,7 +19,7 @@ module Lrama
attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux
attr_accessor :union, :expect,
:printers,
:printers, :error_tokens,
:lex_param, :parse_param, :initial_action,
:symbols, :types,
:rules, :_rules,
@ -295,6 +27,7 @@ module Lrama
def initialize
@printers = []
@error_tokens = []
@symbols = []
@types = []
@_rules = []
@ -314,6 +47,10 @@ module Lrama
@printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
end
def add_error_token(ident_or_tags:, code:, lineno:)
@error_tokens << ErrorToken.new(ident_or_tags, code, lineno)
end
def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false)
if token_id && (sym = @symbols.find {|s| s.token_id == token_id })
if replace
@ -419,12 +156,14 @@ module Lrama
fill_sym_to_rules
fill_nterm_type
fill_symbol_printer
fill_symbol_error_token
@symbols.sort_by!(&:number)
end
# TODO: More validation methods
def validate!
validate_symbol_number_uniqueness!
validate_no_declared_type_reference!
end
def compute_nullable
@ -845,6 +584,23 @@ module Lrama
end
end
def fill_symbol_error_token
@symbols.each do |sym|
@error_tokens.each do |error_token|
error_token.ident_or_tags.each do |ident_or_tag|
case ident_or_tag.type
when Token::Ident
sym.error_token = error_token if sym.id == ident_or_tag
when Token::Tag
sym.error_token = error_token if sym.tag == ident_or_tag
else
raise "Unknown token type. #{error_token}"
end
end
end
end
end
def validate_symbol_number_uniqueness!
invalid = @symbols.group_by(&:number).select do |number, syms|
syms.count > 1
@ -854,5 +610,23 @@ module Lrama
raise "Symbol number is duplicated. #{invalid}"
end
def validate_no_declared_type_reference!
errors = []
rules.each do |rule|
next unless rule.code
rule.code.references.select do |ref|
ref.type == :dollar && !ref.tag
end.each do |ref|
errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type"
end
end
return if errors.empty?
raise errors.join("\n")
end
end
end

View File

@ -0,0 +1,123 @@
require "forwardable"
module Lrama
class Grammar
class Code < Struct.new(:type, :token_code, keyword_init: true)
extend Forwardable
def_delegators "token_code", :s_value, :line, :column, :references
# $$, $n, @$, @n is translated to C code
def translated_code
case type
when :user_code
translated_user_code
when :initial_action
translated_initial_action_code
end
end
# * ($1) error
# * ($$) *yyvaluep
# * (@1) error
# * (@$) *yylocationp
def translated_printer_code(tag)
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = tag.s_value[1..-2]
str = "((*yyvaluep).#{member})"
when ref.value == "$" && ref.type == :at # @$
str = "(*yylocationp)"
when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in %printer."
when ref.type == :at # @n
raise "@#{ref.value} can not be used in %printer."
else
raise "Unexpected. #{self}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
alias :translated_error_token_code :translated_printer_code
private
# * ($1) yyvsp[i]
# * ($$) yyval
# * (@1) yylsp[i]
# * (@$) yyloc
def translated_user_code
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.value == "$" && ref.type == :dollar # $$
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyval.#{member})"
when ref.value == "$" && ref.type == :at # @$
str = "(yyloc)"
when ref.type == :dollar # $n
i = -ref.position_in_rhs + ref.value
# Omit "<>"
member = ref.tag.s_value[1..-2]
str = "(yyvsp[#{i}].#{member})"
when ref.type == :at # @n
i = -ref.position_in_rhs + ref.value
str = "(yylsp[#{i}])"
else
raise "Unexpected. #{self}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
# * ($1) error
# * ($$) yylval
# * (@1) error
# * (@$) yylloc
def translated_initial_action_code
t_code = s_value.dup
references.reverse.each do |ref|
first_column = ref.first_column
last_column = ref.last_column
case
when ref.value == "$" && ref.type == :dollar # $$
str = "yylval"
when ref.value == "$" && ref.type == :at # @$
str = "yylloc"
when ref.type == :dollar # $n
raise "$#{ref.value} can not be used in initial_action."
when ref.type == :at # @n
raise "@#{ref.value} can not be used in initial_action."
else
raise "Unexpected. #{self}, #{ref}"
end
t_code[first_column..last_column] = str
end
return t_code
end
end
end
end

View File

@ -0,0 +1,9 @@
module Lrama
class Grammar
class ErrorToken < Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true)
def translated_code(member)
code.translated_error_token_code(member)
end
end
end
end

View File

@ -0,0 +1,11 @@
module Lrama
class Grammar
class Precedence < Struct.new(:type, :precedence, keyword_init: true)
include Comparable
def <=>(other)
self.precedence <=> other.precedence
end
end
end
end

View File

@ -0,0 +1,9 @@
module Lrama
class Grammar
class Printer < Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true)
def translated_code(member)
code.translated_printer_code(member)
end
end
end
end

View File

@ -0,0 +1,22 @@
# type: :dollar or :at
# ex_tag: "$<tag>1" (Optional)
module Lrama
class Grammar
class Reference < Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true)
def tag
if ex_tag
ex_tag
else
# FIXME: Remove this class check
if referring_symbol.is_a?(Symbol)
referring_symbol.tag
else
# Lrama::Lexer::Token (User_code) case
nil
end
end
end
end
end
end

View File

@ -0,0 +1,33 @@
module Lrama
class Grammar
class Rule < Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true)
# TODO: Change this to display_name
def to_s
l = lhs.id.s_value
r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ")
"#{l} -> #{r}"
end
# Used by #user_actions
def as_comment
l = lhs.id.s_value
r = rhs.empty? ? "%empty" : rhs.map(&:display_name).join(" ")
"#{l}: #{r}"
end
def precedence
precedence_sym&.precedence
end
def initial_rule?
id == 0
end
def translated_code
code&.translated_code
end
end
end
end

View File

@ -0,0 +1,94 @@
# Symbol is both of nterm and term
# `number` is both for nterm and term
# `token_id` is tokentype for term, internal sequence number for nterm
#
# TODO: Add validation for ASCII code range for Token::Char
module Lrama
class Grammar
class Symbol < Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, :error_token, keyword_init: true)
attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def term?
term
end
def nterm?
!term
end
def eof_symbol?
!!@eof_symbol
end
def error_symbol?
!!@error_symbol
end
def undef_symbol?
!!@undef_symbol
end
def accept_symbol?
!!@accept_symbol
end
def display_name
if alias_name
alias_name
else
id.s_value
end
end
# name for yysymbol_kind_t
#
# See: b4_symbol_kind_base
def enum_name
case
when accept_symbol?
name = "YYACCEPT"
when eof_symbol?
name = "YYEOF"
when term? && id.type == Token::Char
if alias_name
name = number.to_s + alias_name
else
name = number.to_s + id.s_value
end
when term? && id.type == Token::Ident
name = id.s_value
when nterm? && (id.s_value.include?("$") || id.s_value.include?("@"))
name = number.to_s + id.s_value
when nterm?
name = id.s_value
else
raise "Unexpected #{self}"
end
"YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_")
end
# comment for yysymbol_kind_t
def comment
case
when accept_symbol?
# YYSYMBOL_YYACCEPT
id.s_value
when eof_symbol?
# YYEOF
alias_name
when (term? && 0 < token_id && token_id < 128)
# YYSYMBOL_3_backslash_, YYSYMBOL_14_
alias_name || id.s_value
when id.s_value.include?("$") || id.s_value.include?("@")
# YYSYMBOL_21_1
id.s_value
else
# YYSYMBOL_keyword_class, YYSYMBOL_strings_1
alias_name || id.s_value
end
end
end
end
end

View File

@ -0,0 +1,10 @@
module Lrama
class Grammar
class Union < Struct.new(:code, :lineno, keyword_init: true)
def braces_less_code
# Remove braces
code.s_value[1..-2]
end
end
end
end

View File

@ -0,0 +1,8 @@
require "lrama/irb/load_grammar"
# You need to explicitly require this file to add irb commands
# See: "lib/irb/extend-command.rb" in ruby/irb
module Lrama
module IRB
end
end

View File

@ -0,0 +1,9 @@
require 'irb/cmd/nop'
module Lrama
module IRB
class LoadGrammar < IRB::ExtendCommand::Nop
end
end
end

View File

@ -0,0 +1,28 @@
require 'json'
module Lrama
class JsonReporter
include Lrama::Report::Duration
def initialize(states)
@states = states
end
def report(io, **options)
report_duration(:report) do
_report(io, **options)
end
end
private
def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false)
# TODO: Unused terms
# TODO: Unused rules
report_conflicts(io)
report_grammar(io) if grammar
report_states(io, itemsets, lookaheads, solved, verbose)
end
end
end

View File

@ -1,84 +1,12 @@
require "strscan"
require "lrama/report"
require "lrama/report/duration"
require "lrama/lexer/token"
module Lrama
# Lexer for parse.y
class Lexer
include Lrama::Report::Duration
# s_value is semantic value
Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do
Type = Struct.new(:id, :name, keyword_init: true)
attr_accessor :line, :column, :referred
# For User_code
attr_accessor :references
def to_s
"#{super} line: #{line}, column: #{column}"
end
def referred_by?(string)
[self.s_value, self.alias].include?(string)
end
def ==(other)
self.class == other.class && self.type == other.type && self.s_value == other.s_value
end
def numberize_references(lhs, rhs)
self.references.map! {|ref|
ref_name = ref[1]
if ref_name.is_a?(String) && ref_name != '$'
value =
if lhs.referred_by?(ref_name)
'$'
else
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
end
[ref[0], value, ref[2], ref[3], ref[4]]
else
ref
end
}
end
@i = 0
@types = []
def self.define_type(name)
type = Type.new(id: @i, name: name.to_s)
const_set(name, type)
@types << type
@i += 1
end
# Token types
define_type(:P_expect) # %expect
define_type(:P_define) # %define
define_type(:P_printer) # %printer
define_type(:P_lex_param) # %lex-param
define_type(:P_parse_param) # %parse-param
define_type(:P_initial_action) # %initial-action
define_type(:P_union) # %union
define_type(:P_token) # %token
define_type(:P_type) # %type
define_type(:P_nonassoc) # %nonassoc
define_type(:P_left) # %left
define_type(:P_right) # %right
define_type(:P_prec) # %prec
define_type(:User_code) # { ... }
define_type(:Tag) # <int>
define_type(:Number) # 0
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
define_type(:Ident) # api.pure, tNUMBER
define_type(:Named_Ref) # [foo]
define_type(:Semicolon) # ;
define_type(:Bar) # |
define_type(:String) # "str"
define_type(:Char) # '+'
end
# States
#
# See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html
@ -207,6 +135,8 @@ module Lrama
tokens << create_token(Token::P_define, ss[0], line, ss.pos - column)
when ss.scan(/%printer/)
tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column)
when ss.scan(/%error-token/)
tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column)
when ss.scan(/%lex-param/)
tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column)
when ss.scan(/%parse-param/)

View File

@ -0,0 +1,76 @@
module Lrama
class Lexer
class Token < Struct.new(:type, :s_value, :alias, keyword_init: true)
Type = Struct.new(:id, :name, keyword_init: true)
attr_accessor :line, :column, :referred
# For User_code
attr_accessor :references
def to_s
"#{super} line: #{line}, column: #{column}"
end
def referred_by?(string)
[self.s_value, self.alias].include?(string)
end
def ==(other)
self.class == other.class && self.type == other.type && self.s_value == other.s_value
end
def numberize_references(lhs, rhs)
self.references.map! {|ref|
ref_name = ref[1]
if ref_name.is_a?(::String) && ref_name != '$'
value =
if lhs.referred_by?(ref_name)
'$'
else
rhs.find_index {|token| token.referred_by?(ref_name) } + 1
end
[ref[0], value, ref[2], ref[3], ref[4]]
else
ref
end
}
end
@i = 0
@types = []
def self.define_type(name)
type = Type.new(id: @i, name: name.to_s)
const_set(name, type)
@types << type
@i += 1
end
# Token types
define_type(:P_expect) # %expect
define_type(:P_define) # %define
define_type(:P_printer) # %printer
define_type(:P_error_token) # %error-token
define_type(:P_lex_param) # %lex-param
define_type(:P_parse_param) # %parse-param
define_type(:P_initial_action) # %initial-action
define_type(:P_union) # %union
define_type(:P_token) # %token
define_type(:P_type) # %type
define_type(:P_nonassoc) # %nonassoc
define_type(:P_left) # %left
define_type(:P_right) # %right
define_type(:P_prec) # %prec
define_type(:User_code) # { ... }
define_type(:Tag) # <int>
define_type(:Number) # 0
define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there)
define_type(:Ident) # api.pure, tNUMBER
define_type(:Named_Ref) # [foo]
define_type(:Semicolon) # ;
define_type(:Bar) # |
define_type(:String) # "str"
define_type(:Char) # '+'
end
end
end

View File

@ -1,20 +1,24 @@
require "erb"
require "forwardable"
require "lrama/report"
require "lrama/report/duration"
module Lrama
class Output
extend Forwardable
include Report::Duration
attr_reader :grammar_file_path, :context, :grammar
attr_reader :grammar_file_path, :context, :grammar, :error_recovery
def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates,
:yymaxutok, :yypact_ninf, :yytable_ninf
def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol
def initialize(out:, output_file_path:, template_name:, grammar_file_path:, header_out: nil, header_file_path: nil, context:, grammar:)
def initialize(
out:, output_file_path:, template_name:, grammar_file_path:,
header_out: nil, header_file_path: nil,
context:, grammar:, error_recovery: false
)
@out = out
@output_file_path = output_file_path
@template_name = template_name
@ -23,6 +27,7 @@ module Lrama
@header_file_path = header_file_path
@context = context
@grammar = grammar
@error_recovery = error_recovery
end
if ERB.instance_method(:initialize).parameters.last.first == :key
@ -98,6 +103,10 @@ module Lrama
int_array_to_string(@context.yytranslate)
end
def yytranslate_inverted
int_array_to_string(@context.yytranslate_inverted)
end
def yyrline
int_array_to_string(@context.yyrline)
end
@ -155,6 +164,25 @@ module Lrama
STR
end
def symbol_actions_for_error_token
str = ""
@grammar.symbols.each do |sym|
next unless sym.error_token
str << <<-STR
case #{sym.enum_name}: /* #{sym.comment} */
#line #{sym.error_token.lineno} "#{@grammar_file_path}"
#{sym.error_token.translated_code(sym.tag)}
#line [@oline@] [@ofile@]
break;
STR
end
str
end
# b4_user_actions
def user_actions
str = ""

View File

@ -1,4 +1,4 @@
require "lrama/report"
require "lrama/report/duration"
require "lrama/parser/token_scanner"
module Lrama
@ -59,6 +59,13 @@ module Lrama
code = grammar.build_code(:printer, code)
ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
when T::P_error_token
lineno = ts.current_token.line
ts.next
code = ts.consume!(T::User_code)
code = grammar.build_code(:printer, code)
ident_or_tags = ts.consume_multi(T::Ident, T::Tag)
grammar.add_error_token(ident_or_tags: ident_or_tags, code: code, lineno: lineno)
when T::P_lex_param
ts.next
code = ts.consume!(T::User_code)

View File

@ -1,47 +1,2 @@
module Lrama
class Report
module Profile
# 1. Wrap target method with Profile.report_profile like below:
#
# Lrama::Report::Profile.report_profile { method }
#
# 2. Run lrama command, for example
#
# $ ./exe/lrama --trace=time spec/fixtures/integration/ruby_3_2_0/parse.tmp.y
#
# 3. Generate html file
#
# $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html
#
def self.report_profile
require "stackprof"
StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
yield
end
end
end
module Duration
def self.enable
@_report_duration_enabled = true
end
def self.enabled?
!!@_report_duration_enabled
end
def report_duration(method_name)
time1 = Time.now.to_f
result = yield
time2 = Time.now.to_f
if Duration.enabled?
puts sprintf("%s %10.5f s", method_name, time2 - time1)
end
return result
end
end
end
end
require 'lrama/report/duration'
require 'lrama/report/profile'

View File

@ -0,0 +1,25 @@
module Lrama
class Report
module Duration
def self.enable
@_report_duration_enabled = true
end
def self.enabled?
!!@_report_duration_enabled
end
def report_duration(method_name)
time1 = Time.now.to_f
result = yield
time2 = Time.now.to_f
if Duration.enabled?
puts sprintf("%s %10.5f s", method_name, time2 - time1)
end
return result
end
end
end
end

View File

@ -0,0 +1,25 @@
module Lrama
class Report
module Profile
# 1. Wrap target method with Profile.report_profile like below:
#
# Lrama::Report::Profile.report_profile { method }
#
# 2. Run lrama command, for example
#
# $ ./exe/lrama --trace=time spec/fixtures/integration/ruby_3_2_0/parse.tmp.y
#
# 3. Generate html file
#
# $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html
#
def self.report_profile
require "stackprof"
StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do
yield
end
end
end
end
end

View File

@ -1,34 +1,9 @@
require "lrama/state/reduce"
require "lrama/state/shift"
require "lrama/state/resolved_conflict"
module Lrama
class State
# * symbol: A symbol under discussion
# * reduce: A reduce under discussion
# * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative)
ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do
def report_message
s = symbol.display_name
r = reduce.rule.precedence_sym.display_name
case
when which == :shift && same_prec
msg = "resolved as #{which} (%right #{s})"
when which == :shift
msg = "resolved as #{which} (#{r} < #{s})"
when which == :reduce && same_prec
msg = "resolved as #{which} (%left #{s})"
when which == :reduce
msg = "resolved as #{which} (#{s} < #{r})"
when which == :error
msg = "resolved as an #{which} (%nonassoc #{s})"
else
raise "Unknown direction. #{self}"
end
"Conflict between rule #{reduce.rule.id} and token #{s} #{msg}."
end
end
Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true)
attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts,
@ -96,7 +71,7 @@ module Lrama
reduce.look_ahead = look_ahead
end
# Returns array of [nterm, next_state]
# Returns array of [Shift, next_state]
def nterm_transitions
return @nterm_transitions if @nterm_transitions
@ -111,7 +86,7 @@ module Lrama
@nterm_transitions
end
# Returns array of [term, next_state]
# Returns array of [Shift, next_state]
def term_transitions
return @term_transitions if @term_transitions

View File

@ -0,0 +1,29 @@
module Lrama
class State
# * symbol: A symbol under discussion
# * reduce: A reduce under discussion
# * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative)
class ResolvedConflict < Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true)
def report_message
s = symbol.display_name
r = reduce.rule.precedence_sym.display_name
case
when which == :shift && same_prec
msg = "resolved as #{which} (%right #{s})"
when which == :shift
msg = "resolved as #{which} (#{r} < #{s})"
when which == :reduce && same_prec
msg = "resolved as #{which} (%left #{s})"
when which == :reduce
msg = "resolved as #{which} (#{s} < #{r})"
when which == :error
msg = "resolved as an #{which} (%nonassoc #{s})"
else
raise "Unknown direction. #{self}"
end
"Conflict between rule #{reduce.rule.id} and token #{s} #{msg}."
end
end
end
end

View File

@ -1,5 +1,6 @@
require "forwardable"
require "lrama/report"
require "lrama/report/duration"
require "lrama/states/item"
module Lrama
# States is passed to a template file
@ -11,46 +12,7 @@ module Lrama
include Lrama::Report::Duration
def_delegators "@grammar", :symbols, :terms, :nterms, :rules,
:accept_symbol, :eof_symbol, :find_symbol_by_s_value!
# TODO: Validate position is not over rule rhs
Item = Struct.new(:rule, :position, keyword_init: true) do
# Optimization for States#setup_state
def hash
[rule.id, position].hash
end
def rule_id
rule.id
end
def next_sym
rule.rhs[position]
end
def end_of_rule?
rule.rhs.count == position
end
def new_by_next_position
Item.new(rule: rule, position: position + 1)
end
def previous_sym
rule.rhs[position - 1]
end
def display_name
r = rule.rhs.map(&:display_name).insert(position, "").join(" ")
"#{r} (rule #{rule.id})"
end
# Right after position
def display_rest
r = rule.rhs[position..-1].map(&:display_name).join(" ")
". #{r} (rule #{rule.id})"
end
end
:accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value!
attr_reader :states, :reads_relation, :includes_relation, :lookback_relation

View File

@ -0,0 +1,43 @@
# TODO: Validate position is not over rule rhs
module Lrama
class States
class Item < Struct.new(:rule, :position, keyword_init: true)
# Optimization for States#setup_state
def hash
[rule.id, position].hash
end
def rule_id
rule.id
end
def next_sym
rule.rhs[position]
end
def end_of_rule?
rule.rhs.count == position
end
def new_by_next_position
Item.new(rule: rule, position: position + 1)
end
def previous_sym
rule.rhs[position - 1]
end
def display_name
r = rule.rhs.map(&:display_name).insert(position, "").join(" ")
"#{r} (rule #{rule.id})"
end
# Right after position
def display_rest
r = rule.rhs[position..-1].map(&:display_name).join(" ")
". #{r} (rule #{rule.id})"
end
end
end
end

View File

@ -1,3 +1,3 @@
module Lrama
VERSION = "0.5.2".freeze
VERSION = "0.5.3".freeze
end