[ruby/irb] Deprecate RubyLex and warn about referencing to it
(https://github.com/ruby/irb/pull/692) `RubyLex` has always been a private component of IRB, so we should explicitly discourage usages of it. Also, it should be placed under the `IRB` module like other components. https://github.com/ruby/irb/commit/069b5625f7
This commit is contained in:
parent
20927a89c2
commit
e1d7066a5f
@ -8,489 +8,494 @@ require "ripper"
|
||||
require "jruby" if RUBY_ENGINE == "jruby"
|
||||
require_relative "nesting_parser"
|
||||
|
||||
# :stopdoc:
|
||||
class RubyLex
|
||||
ASSIGNMENT_NODE_TYPES = [
|
||||
# Local, instance, global, class, constant, instance, and index assignment:
|
||||
# "foo = bar",
|
||||
# "@foo = bar",
|
||||
# "$foo = bar",
|
||||
# "@@foo = bar",
|
||||
# "::Foo = bar",
|
||||
# "a::Foo = bar",
|
||||
# "Foo = bar"
|
||||
# "foo.bar = 1"
|
||||
# "foo[1] = bar"
|
||||
:assign,
|
||||
module IRB
|
||||
# :stopdoc:
|
||||
class RubyLex
|
||||
ASSIGNMENT_NODE_TYPES = [
|
||||
# Local, instance, global, class, constant, instance, and index assignment:
|
||||
# "foo = bar",
|
||||
# "@foo = bar",
|
||||
# "$foo = bar",
|
||||
# "@@foo = bar",
|
||||
# "::Foo = bar",
|
||||
# "a::Foo = bar",
|
||||
# "Foo = bar"
|
||||
# "foo.bar = 1"
|
||||
# "foo[1] = bar"
|
||||
:assign,
|
||||
|
||||
# Operation assignment:
|
||||
# "foo += bar"
|
||||
# "foo -= bar"
|
||||
# "foo ||= bar"
|
||||
# "foo &&= bar"
|
||||
:opassign,
|
||||
# Operation assignment:
|
||||
# "foo += bar"
|
||||
# "foo -= bar"
|
||||
# "foo ||= bar"
|
||||
# "foo &&= bar"
|
||||
:opassign,
|
||||
|
||||
# Multiple assignment:
|
||||
# "foo, bar = 1, 2
|
||||
:massign,
|
||||
]
|
||||
# Multiple assignment:
|
||||
# "foo, bar = 1, 2
|
||||
:massign,
|
||||
]
|
||||
|
||||
class TerminateLineInput < StandardError
|
||||
def initialize
|
||||
super("Terminate Line Input")
|
||||
class TerminateLineInput < StandardError
|
||||
def initialize
|
||||
super("Terminate Line Input")
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
attr_reader :line_no
|
||||
attr_reader :line_no
|
||||
|
||||
def initialize(context)
|
||||
@context = context
|
||||
@line_no = 1
|
||||
@prompt = nil
|
||||
end
|
||||
|
||||
def self.compile_with_errors_suppressed(code, line_no: 1)
|
||||
begin
|
||||
result = yield code, line_no
|
||||
rescue ArgumentError
|
||||
# Ruby can issue an error for the code if there is an
|
||||
# incomplete magic comment for encoding in it. Force an
|
||||
# expression with a new line before the code in this
|
||||
# case to prevent magic comment handling. To make sure
|
||||
# line numbers in the lexed code remain the same,
|
||||
# decrease the line number by one.
|
||||
code = ";\n#{code}"
|
||||
line_no -= 1
|
||||
result = yield code, line_no
|
||||
def initialize(context)
|
||||
@context = context
|
||||
@line_no = 1
|
||||
@prompt = nil
|
||||
end
|
||||
result
|
||||
end
|
||||
|
||||
def set_prompt(&block)
|
||||
@prompt = block
|
||||
end
|
||||
|
||||
ERROR_TOKENS = [
|
||||
:on_parse_error,
|
||||
:compile_error,
|
||||
:on_assign_error,
|
||||
:on_alias_error,
|
||||
:on_class_name_error,
|
||||
:on_param_error
|
||||
]
|
||||
|
||||
def self.generate_local_variables_assign_code(local_variables)
|
||||
"#{local_variables.join('=')}=nil;" unless local_variables.empty?
|
||||
end
|
||||
|
||||
# Some part of the code is not included in Ripper's token.
|
||||
# Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr.
|
||||
# With interpolated tokens, tokens.map(&:tok).join will be equal to code.
|
||||
def self.interpolate_ripper_ignored_tokens(code, tokens)
|
||||
line_positions = [0]
|
||||
code.lines.each do |line|
|
||||
line_positions << line_positions.last + line.bytesize
|
||||
def self.compile_with_errors_suppressed(code, line_no: 1)
|
||||
begin
|
||||
result = yield code, line_no
|
||||
rescue ArgumentError
|
||||
# Ruby can issue an error for the code if there is an
|
||||
# incomplete magic comment for encoding in it. Force an
|
||||
# expression with a new line before the code in this
|
||||
# case to prevent magic comment handling. To make sure
|
||||
# line numbers in the lexed code remain the same,
|
||||
# decrease the line number by one.
|
||||
code = ";\n#{code}"
|
||||
line_no -= 1
|
||||
result = yield code, line_no
|
||||
end
|
||||
result
|
||||
end
|
||||
prev_byte_pos = 0
|
||||
interpolated = []
|
||||
prev_line = 1
|
||||
tokens.each do |t|
|
||||
line, col = t.pos
|
||||
byte_pos = line_positions[line - 1] + col
|
||||
if prev_byte_pos < byte_pos
|
||||
tok = code.byteslice(prev_byte_pos...byte_pos)
|
||||
|
||||
def set_prompt(&block)
|
||||
@prompt = block
|
||||
end
|
||||
|
||||
ERROR_TOKENS = [
|
||||
:on_parse_error,
|
||||
:compile_error,
|
||||
:on_assign_error,
|
||||
:on_alias_error,
|
||||
:on_class_name_error,
|
||||
:on_param_error
|
||||
]
|
||||
|
||||
def self.generate_local_variables_assign_code(local_variables)
|
||||
"#{local_variables.join('=')}=nil;" unless local_variables.empty?
|
||||
end
|
||||
|
||||
# Some part of the code is not included in Ripper's token.
|
||||
# Example: DATA part, token after heredoc_beg when heredoc has unclosed embexpr.
|
||||
# With interpolated tokens, tokens.map(&:tok).join will be equal to code.
|
||||
def self.interpolate_ripper_ignored_tokens(code, tokens)
|
||||
line_positions = [0]
|
||||
code.lines.each do |line|
|
||||
line_positions << line_positions.last + line.bytesize
|
||||
end
|
||||
prev_byte_pos = 0
|
||||
interpolated = []
|
||||
prev_line = 1
|
||||
tokens.each do |t|
|
||||
line, col = t.pos
|
||||
byte_pos = line_positions[line - 1] + col
|
||||
if prev_byte_pos < byte_pos
|
||||
tok = code.byteslice(prev_byte_pos...byte_pos)
|
||||
pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
|
||||
interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
|
||||
prev_line += tok.count("\n")
|
||||
end
|
||||
interpolated << t
|
||||
prev_byte_pos = byte_pos + t.tok.bytesize
|
||||
prev_line += t.tok.count("\n")
|
||||
end
|
||||
if prev_byte_pos < code.bytesize
|
||||
tok = code.byteslice(prev_byte_pos..)
|
||||
pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
|
||||
interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
|
||||
prev_line += tok.count("\n")
|
||||
end
|
||||
interpolated << t
|
||||
prev_byte_pos = byte_pos + t.tok.bytesize
|
||||
prev_line += t.tok.count("\n")
|
||||
end
|
||||
if prev_byte_pos < code.bytesize
|
||||
tok = code.byteslice(prev_byte_pos..)
|
||||
pos = [prev_line, prev_byte_pos - line_positions[prev_line - 1]]
|
||||
interpolated << Ripper::Lexer::Elem.new(pos, :on_ignored_by_ripper, tok, 0)
|
||||
end
|
||||
interpolated
|
||||
end
|
||||
|
||||
def self.ripper_lex_without_warning(code, context: nil)
|
||||
verbose, $VERBOSE = $VERBOSE, nil
|
||||
lvars_code = generate_local_variables_assign_code(context&.local_variables || [])
|
||||
original_code = code
|
||||
if lvars_code
|
||||
code = "#{lvars_code}\n#{code}"
|
||||
line_no = 0
|
||||
else
|
||||
line_no = 1
|
||||
interpolated
|
||||
end
|
||||
|
||||
compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
|
||||
lexer = Ripper::Lexer.new(inner_code, '-', line_no)
|
||||
tokens = []
|
||||
lexer.scan.each do |t|
|
||||
next if t.pos.first == 0
|
||||
prev_tk = tokens.last
|
||||
position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
|
||||
if position_overlapped
|
||||
tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event)
|
||||
else
|
||||
tokens << t
|
||||
end
|
||||
end
|
||||
interpolate_ripper_ignored_tokens(original_code, tokens)
|
||||
end
|
||||
ensure
|
||||
$VERBOSE = verbose
|
||||
end
|
||||
|
||||
def prompt(opens, continue, line_num_offset)
|
||||
ltype = ltype_from_open_tokens(opens)
|
||||
indent_level = calc_indent_level(opens)
|
||||
@prompt&.call(ltype, indent_level, opens.any? || continue, @line_no + line_num_offset)
|
||||
end
|
||||
|
||||
def check_code_state(code)
|
||||
tokens = self.class.ripper_lex_without_warning(code, context: @context)
|
||||
opens = IRB::NestingParser.open_tokens(tokens)
|
||||
[tokens, opens, code_terminated?(code, tokens, opens)]
|
||||
end
|
||||
|
||||
def code_terminated?(code, tokens, opens)
|
||||
case check_code_syntax(code)
|
||||
when :unrecoverable_error
|
||||
true
|
||||
when :recoverable_error
|
||||
false
|
||||
when :other_error
|
||||
opens.empty? && !should_continue?(tokens)
|
||||
when :valid
|
||||
!should_continue?(tokens)
|
||||
end
|
||||
end
|
||||
|
||||
def save_prompt_to_context_io(opens, continue, line_num_offset)
|
||||
# Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`.
|
||||
prompt(opens, continue, line_num_offset)
|
||||
end
|
||||
|
||||
def increase_line_no(addition)
|
||||
@line_no += addition
|
||||
end
|
||||
|
||||
def assignment_expression?(code)
|
||||
# Try to parse the code and check if the last of possibly multiple
|
||||
# expressions is an assignment type.
|
||||
|
||||
# If the expression is invalid, Ripper.sexp should return nil which will
|
||||
# result in false being returned. Any valid expression should return an
|
||||
# s-expression where the second element of the top level array is an
|
||||
# array of parsed expressions. The first element of each expression is the
|
||||
# expression's type.
|
||||
verbose, $VERBOSE = $VERBOSE, nil
|
||||
code = "#{RubyLex.generate_local_variables_assign_code(@context.local_variables) || 'nil;'}\n#{code}"
|
||||
# Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part.
|
||||
node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0)
|
||||
ASSIGNMENT_NODE_TYPES.include?(node_type)
|
||||
ensure
|
||||
$VERBOSE = verbose
|
||||
end
|
||||
|
||||
def should_continue?(tokens)
|
||||
# Look at the last token and check if IRB need to continue reading next line.
|
||||
# Example code that should continue: `a\` `a +` `a.`
|
||||
# Trailing spaces, newline, comments are skipped
|
||||
return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"
|
||||
|
||||
tokens.reverse_each do |token|
|
||||
case token.event
|
||||
when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
|
||||
# Skip
|
||||
when :on_regexp_end, :on_heredoc_end, :on_semicolon
|
||||
# State is EXPR_BEG but should not continue
|
||||
return false
|
||||
else
|
||||
# Endless range should not continue
|
||||
return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)
|
||||
|
||||
# EXPR_DOT and most of the EXPR_BEG should continue
|
||||
return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
|
||||
end
|
||||
end
|
||||
false
|
||||
end
|
||||
|
||||
def check_code_syntax(code)
|
||||
lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables)
|
||||
code = "#{lvars_code}\n#{code}"
|
||||
|
||||
begin # check if parser error are available
|
||||
def self.ripper_lex_without_warning(code, context: nil)
|
||||
verbose, $VERBOSE = $VERBOSE, nil
|
||||
case RUBY_ENGINE
|
||||
when 'ruby'
|
||||
self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
|
||||
RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
|
||||
end
|
||||
when 'jruby'
|
||||
JRuby.compile_ir(code)
|
||||
lvars_code = generate_local_variables_assign_code(context&.local_variables || [])
|
||||
original_code = code
|
||||
if lvars_code
|
||||
code = "#{lvars_code}\n#{code}"
|
||||
line_no = 0
|
||||
else
|
||||
catch(:valid) do
|
||||
eval("BEGIN { throw :valid, true }\n#{code}")
|
||||
false
|
||||
end
|
||||
line_no = 1
|
||||
end
|
||||
rescue EncodingError
|
||||
# This is for a hash with invalid encoding symbol, {"\xAE": 1}
|
||||
:unrecoverable_error
|
||||
rescue SyntaxError => e
|
||||
case e.message
|
||||
when /unterminated (?:string|regexp) meets end of file/
|
||||
# "unterminated regexp meets end of file"
|
||||
#
|
||||
# example:
|
||||
# /
|
||||
#
|
||||
# "unterminated string meets end of file"
|
||||
#
|
||||
# example:
|
||||
# '
|
||||
return :recoverable_error
|
||||
when /syntax error, unexpected end-of-input/
|
||||
# "syntax error, unexpected end-of-input, expecting keyword_end"
|
||||
#
|
||||
# example:
|
||||
# if true
|
||||
# hoge
|
||||
# if false
|
||||
# fuga
|
||||
# end
|
||||
return :recoverable_error
|
||||
when /syntax error, unexpected keyword_end/
|
||||
# "syntax error, unexpected keyword_end"
|
||||
#
|
||||
# example:
|
||||
# if (
|
||||
# end
|
||||
#
|
||||
# example:
|
||||
# end
|
||||
return :unrecoverable_error
|
||||
when /syntax error, unexpected '\.'/
|
||||
# "syntax error, unexpected '.'"
|
||||
#
|
||||
# example:
|
||||
# .
|
||||
return :unrecoverable_error
|
||||
when /unexpected tREGEXP_BEG/
|
||||
# "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
|
||||
#
|
||||
# example:
|
||||
# method / f /
|
||||
return :unrecoverable_error
|
||||
else
|
||||
return :other_error
|
||||
|
||||
compile_with_errors_suppressed(code, line_no: line_no) do |inner_code, line_no|
|
||||
lexer = Ripper::Lexer.new(inner_code, '-', line_no)
|
||||
tokens = []
|
||||
lexer.scan.each do |t|
|
||||
next if t.pos.first == 0
|
||||
prev_tk = tokens.last
|
||||
position_overlapped = prev_tk && t.pos[0] == prev_tk.pos[0] && t.pos[1] < prev_tk.pos[1] + prev_tk.tok.bytesize
|
||||
if position_overlapped
|
||||
tokens[-1] = t if ERROR_TOKENS.include?(prev_tk.event) && !ERROR_TOKENS.include?(t.event)
|
||||
else
|
||||
tokens << t
|
||||
end
|
||||
end
|
||||
interpolate_ripper_ignored_tokens(original_code, tokens)
|
||||
end
|
||||
ensure
|
||||
$VERBOSE = verbose
|
||||
end
|
||||
:valid
|
||||
end
|
||||
|
||||
def calc_indent_level(opens)
|
||||
indent_level = 0
|
||||
opens.each_with_index do |t, index|
|
||||
case t.event
|
||||
when :on_heredoc_beg
|
||||
if opens[index + 1]&.event != :on_heredoc_beg
|
||||
if t.tok.match?(/^<<[~-]/)
|
||||
indent_level += 1
|
||||
else
|
||||
indent_level = 0
|
||||
def prompt(opens, continue, line_num_offset)
|
||||
ltype = ltype_from_open_tokens(opens)
|
||||
indent_level = calc_indent_level(opens)
|
||||
@prompt&.call(ltype, indent_level, opens.any? || continue, @line_no + line_num_offset)
|
||||
end
|
||||
|
||||
def check_code_state(code)
|
||||
tokens = self.class.ripper_lex_without_warning(code, context: @context)
|
||||
opens = NestingParser.open_tokens(tokens)
|
||||
[tokens, opens, code_terminated?(code, tokens, opens)]
|
||||
end
|
||||
|
||||
def code_terminated?(code, tokens, opens)
|
||||
case check_code_syntax(code)
|
||||
when :unrecoverable_error
|
||||
true
|
||||
when :recoverable_error
|
||||
false
|
||||
when :other_error
|
||||
opens.empty? && !should_continue?(tokens)
|
||||
when :valid
|
||||
!should_continue?(tokens)
|
||||
end
|
||||
end
|
||||
|
||||
def save_prompt_to_context_io(opens, continue, line_num_offset)
|
||||
# Implicitly saves prompt string to `@context.io.prompt`. This will be used in the next `@input.call`.
|
||||
prompt(opens, continue, line_num_offset)
|
||||
end
|
||||
|
||||
def increase_line_no(addition)
|
||||
@line_no += addition
|
||||
end
|
||||
|
||||
def assignment_expression?(code)
|
||||
# Try to parse the code and check if the last of possibly multiple
|
||||
# expressions is an assignment type.
|
||||
|
||||
# If the expression is invalid, Ripper.sexp should return nil which will
|
||||
# result in false being returned. Any valid expression should return an
|
||||
# s-expression where the second element of the top level array is an
|
||||
# array of parsed expressions. The first element of each expression is the
|
||||
# expression's type.
|
||||
verbose, $VERBOSE = $VERBOSE, nil
|
||||
code = "#{RubyLex.generate_local_variables_assign_code(@context.local_variables) || 'nil;'}\n#{code}"
|
||||
# Get the last node_type of the line. drop(1) is to ignore the local_variables_assign_code part.
|
||||
node_type = Ripper.sexp(code)&.dig(1)&.drop(1)&.dig(-1, 0)
|
||||
ASSIGNMENT_NODE_TYPES.include?(node_type)
|
||||
ensure
|
||||
$VERBOSE = verbose
|
||||
end
|
||||
|
||||
def should_continue?(tokens)
|
||||
# Look at the last token and check if IRB need to continue reading next line.
|
||||
# Example code that should continue: `a\` `a +` `a.`
|
||||
# Trailing spaces, newline, comments are skipped
|
||||
return true if tokens.last&.event == :on_sp && tokens.last.tok == "\\\n"
|
||||
|
||||
tokens.reverse_each do |token|
|
||||
case token.event
|
||||
when :on_sp, :on_nl, :on_ignored_nl, :on_comment, :on_embdoc_beg, :on_embdoc, :on_embdoc_end
|
||||
# Skip
|
||||
when :on_regexp_end, :on_heredoc_end, :on_semicolon
|
||||
# State is EXPR_BEG but should not continue
|
||||
return false
|
||||
else
|
||||
# Endless range should not continue
|
||||
return false if token.event == :on_op && token.tok.match?(/\A\.\.\.?\z/)
|
||||
|
||||
# EXPR_DOT and most of the EXPR_BEG should continue
|
||||
return token.state.anybits?(Ripper::EXPR_BEG | Ripper::EXPR_DOT)
|
||||
end
|
||||
end
|
||||
false
|
||||
end
|
||||
|
||||
def check_code_syntax(code)
|
||||
lvars_code = RubyLex.generate_local_variables_assign_code(@context.local_variables)
|
||||
code = "#{lvars_code}\n#{code}"
|
||||
|
||||
begin # check if parser error are available
|
||||
verbose, $VERBOSE = $VERBOSE, nil
|
||||
case RUBY_ENGINE
|
||||
when 'ruby'
|
||||
self.class.compile_with_errors_suppressed(code) do |inner_code, line_no|
|
||||
RubyVM::InstructionSequence.compile(inner_code, nil, nil, line_no)
|
||||
end
|
||||
when 'jruby'
|
||||
JRuby.compile_ir(code)
|
||||
else
|
||||
catch(:valid) do
|
||||
eval("BEGIN { throw :valid, true }\n#{code}")
|
||||
false
|
||||
end
|
||||
end
|
||||
when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
|
||||
# No indent: "", //, :"", ``
|
||||
# Indent: %(), %r(), %i(), %x()
|
||||
indent_level += 1 if t.tok.start_with? '%'
|
||||
when :on_embdoc_beg
|
||||
indent_level = 0
|
||||
else
|
||||
indent_level += 1
|
||||
end
|
||||
end
|
||||
indent_level
|
||||
end
|
||||
|
||||
FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]
|
||||
|
||||
def free_indent_token?(token)
|
||||
FREE_INDENT_TOKENS.include?(token&.event)
|
||||
end
|
||||
|
||||
# Calculates the difference of pasted code's indent and indent calculated from tokens
|
||||
def indent_difference(lines, line_results, line_index)
|
||||
loop do
|
||||
_tokens, prev_opens, _next_opens, min_depth = line_results[line_index]
|
||||
open_token = prev_opens.last
|
||||
if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token))
|
||||
# If the leading whitespace is an indent, return the difference
|
||||
indent_level = calc_indent_level(prev_opens.take(min_depth))
|
||||
calculated_indent = 2 * indent_level
|
||||
actual_indent = lines[line_index][/^ */].size
|
||||
return actual_indent - calculated_indent
|
||||
elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/)
|
||||
return 0
|
||||
end
|
||||
# If the leading whitespace is not an indent but part of a multiline token
|
||||
# Calculate base_indent of the multiline token's beginning line
|
||||
line_index = open_token.pos[0] - 1
|
||||
end
|
||||
end
|
||||
|
||||
def process_indent_level(tokens, lines, line_index, is_newline)
|
||||
line_results = IRB::NestingParser.parse_by_line(tokens)
|
||||
result = line_results[line_index]
|
||||
if result
|
||||
_tokens, prev_opens, next_opens, min_depth = result
|
||||
else
|
||||
# When last line is empty
|
||||
prev_opens = next_opens = line_results.last[2]
|
||||
min_depth = next_opens.size
|
||||
end
|
||||
|
||||
# To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
|
||||
# Shortest open tokens can be calculated by `opens.take(min_depth)`
|
||||
indent = 2 * calc_indent_level(prev_opens.take(min_depth))
|
||||
|
||||
preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size
|
||||
|
||||
prev_open_token = prev_opens.last
|
||||
next_open_token = next_opens.last
|
||||
|
||||
# Calculates base indent for pasted code on the line where prev_open_token is located
|
||||
# irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0
|
||||
# irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2
|
||||
# irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4
|
||||
if prev_open_token
|
||||
base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max
|
||||
else
|
||||
base_indent = 0
|
||||
end
|
||||
|
||||
if free_indent_token?(prev_open_token)
|
||||
if is_newline && prev_open_token.pos[0] == line_index
|
||||
# First newline inside free-indent token
|
||||
base_indent + indent
|
||||
else
|
||||
# Accept any number of indent inside free-indent token
|
||||
preserve_indent
|
||||
end
|
||||
elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg
|
||||
if prev_open_token&.event == next_open_token&.event
|
||||
# Accept any number of indent inside embdoc content
|
||||
preserve_indent
|
||||
else
|
||||
# =begin or =end
|
||||
0
|
||||
end
|
||||
elsif prev_open_token&.event == :on_heredoc_beg
|
||||
tok = prev_open_token.tok
|
||||
if prev_opens.size <= next_opens.size
|
||||
if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token
|
||||
# First line in heredoc
|
||||
tok.match?(/^<<[-~]/) ? base_indent + indent : indent
|
||||
elsif tok.match?(/^<<~/)
|
||||
# Accept extra indent spaces inside `<<~` heredoc
|
||||
[base_indent + indent, preserve_indent].max
|
||||
rescue EncodingError
|
||||
# This is for a hash with invalid encoding symbol, {"\xAE": 1}
|
||||
:unrecoverable_error
|
||||
rescue SyntaxError => e
|
||||
case e.message
|
||||
when /unterminated (?:string|regexp) meets end of file/
|
||||
# "unterminated regexp meets end of file"
|
||||
#
|
||||
# example:
|
||||
# /
|
||||
#
|
||||
# "unterminated string meets end of file"
|
||||
#
|
||||
# example:
|
||||
# '
|
||||
return :recoverable_error
|
||||
when /syntax error, unexpected end-of-input/
|
||||
# "syntax error, unexpected end-of-input, expecting keyword_end"
|
||||
#
|
||||
# example:
|
||||
# if true
|
||||
# hoge
|
||||
# if false
|
||||
# fuga
|
||||
# end
|
||||
return :recoverable_error
|
||||
when /syntax error, unexpected keyword_end/
|
||||
# "syntax error, unexpected keyword_end"
|
||||
#
|
||||
# example:
|
||||
# if (
|
||||
# end
|
||||
#
|
||||
# example:
|
||||
# end
|
||||
return :unrecoverable_error
|
||||
when /syntax error, unexpected '\.'/
|
||||
# "syntax error, unexpected '.'"
|
||||
#
|
||||
# example:
|
||||
# .
|
||||
return :unrecoverable_error
|
||||
when /unexpected tREGEXP_BEG/
|
||||
# "syntax error, unexpected tREGEXP_BEG, expecting keyword_do or '{' or '('"
|
||||
#
|
||||
# example:
|
||||
# method / f /
|
||||
return :unrecoverable_error
|
||||
else
|
||||
# Accept any number of indent inside other heredoc
|
||||
return :other_error
|
||||
end
|
||||
ensure
|
||||
$VERBOSE = verbose
|
||||
end
|
||||
:valid
|
||||
end
|
||||
|
||||
def calc_indent_level(opens)
|
||||
indent_level = 0
|
||||
opens.each_with_index do |t, index|
|
||||
case t.event
|
||||
when :on_heredoc_beg
|
||||
if opens[index + 1]&.event != :on_heredoc_beg
|
||||
if t.tok.match?(/^<<[~-]/)
|
||||
indent_level += 1
|
||||
else
|
||||
indent_level = 0
|
||||
end
|
||||
end
|
||||
when :on_tstring_beg, :on_regexp_beg, :on_symbeg, :on_backtick
|
||||
# No indent: "", //, :"", ``
|
||||
# Indent: %(), %r(), %i(), %x()
|
||||
indent_level += 1 if t.tok.start_with? '%'
|
||||
when :on_embdoc_beg
|
||||
indent_level = 0
|
||||
else
|
||||
indent_level += 1
|
||||
end
|
||||
end
|
||||
indent_level
|
||||
end
|
||||
|
||||
FREE_INDENT_TOKENS = %i[on_tstring_beg on_backtick on_regexp_beg on_symbeg]
|
||||
|
||||
def free_indent_token?(token)
|
||||
FREE_INDENT_TOKENS.include?(token&.event)
|
||||
end
|
||||
|
||||
# Calculates the difference of pasted code's indent and indent calculated from tokens
|
||||
def indent_difference(lines, line_results, line_index)
|
||||
loop do
|
||||
_tokens, prev_opens, _next_opens, min_depth = line_results[line_index]
|
||||
open_token = prev_opens.last
|
||||
if !open_token || (open_token.event != :on_heredoc_beg && !free_indent_token?(open_token))
|
||||
# If the leading whitespace is an indent, return the difference
|
||||
indent_level = calc_indent_level(prev_opens.take(min_depth))
|
||||
calculated_indent = 2 * indent_level
|
||||
actual_indent = lines[line_index][/^ */].size
|
||||
return actual_indent - calculated_indent
|
||||
elsif open_token.event == :on_heredoc_beg && open_token.tok.match?(/^<<[^-~]/)
|
||||
return 0
|
||||
end
|
||||
# If the leading whitespace is not an indent but part of a multiline token
|
||||
# Calculate base_indent of the multiline token's beginning line
|
||||
line_index = open_token.pos[0] - 1
|
||||
end
|
||||
end
|
||||
|
||||
def process_indent_level(tokens, lines, line_index, is_newline)
|
||||
line_results = NestingParser.parse_by_line(tokens)
|
||||
result = line_results[line_index]
|
||||
if result
|
||||
_tokens, prev_opens, next_opens, min_depth = result
|
||||
else
|
||||
# When last line is empty
|
||||
prev_opens = next_opens = line_results.last[2]
|
||||
min_depth = next_opens.size
|
||||
end
|
||||
|
||||
# To correctly indent line like `end.map do`, we use shortest open tokens on each line for indent calculation.
|
||||
# Shortest open tokens can be calculated by `opens.take(min_depth)`
|
||||
indent = 2 * calc_indent_level(prev_opens.take(min_depth))
|
||||
|
||||
preserve_indent = lines[line_index - (is_newline ? 1 : 0)][/^ */].size
|
||||
|
||||
prev_open_token = prev_opens.last
|
||||
next_open_token = next_opens.last
|
||||
|
||||
# Calculates base indent for pasted code on the line where prev_open_token is located
|
||||
# irb(main):001:1* if a # base_indent is 2, indent calculated from tokens is 0
|
||||
# irb(main):002:1* if b # base_indent is 6, indent calculated from tokens is 2
|
||||
# irb(main):003:0> c # base_indent is 6, indent calculated from tokens is 4
|
||||
if prev_open_token
|
||||
base_indent = [0, indent_difference(lines, line_results, prev_open_token.pos[0] - 1)].max
|
||||
else
|
||||
base_indent = 0
|
||||
end
|
||||
|
||||
if free_indent_token?(prev_open_token)
|
||||
if is_newline && prev_open_token.pos[0] == line_index
|
||||
# First newline inside free-indent token
|
||||
base_indent + indent
|
||||
else
|
||||
# Accept any number of indent inside free-indent token
|
||||
preserve_indent
|
||||
end
|
||||
elsif prev_open_token&.event == :on_embdoc_beg || next_open_token&.event == :on_embdoc_beg
|
||||
if prev_open_token&.event == next_open_token&.event
|
||||
# Accept any number of indent inside embdoc content
|
||||
preserve_indent
|
||||
else
|
||||
# =begin or =end
|
||||
0
|
||||
end
|
||||
elsif prev_open_token&.event == :on_heredoc_beg
|
||||
tok = prev_open_token.tok
|
||||
if prev_opens.size <= next_opens.size
|
||||
if is_newline && lines[line_index].empty? && line_results[line_index - 1][1].last != next_open_token
|
||||
# First line in heredoc
|
||||
tok.match?(/^<<[-~]/) ? base_indent + indent : indent
|
||||
elsif tok.match?(/^<<~/)
|
||||
# Accept extra indent spaces inside `<<~` heredoc
|
||||
[base_indent + indent, preserve_indent].max
|
||||
else
|
||||
# Accept any number of indent inside other heredoc
|
||||
preserve_indent
|
||||
end
|
||||
else
|
||||
# Heredoc close
|
||||
prev_line_indent_level = calc_indent_level(prev_opens)
|
||||
tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
|
||||
end
|
||||
else
|
||||
# Heredoc close
|
||||
prev_line_indent_level = calc_indent_level(prev_opens)
|
||||
tok.match?(/^<<[~-]/) ? base_indent + 2 * (prev_line_indent_level - 1) : 0
|
||||
base_indent + indent
|
||||
end
|
||||
else
|
||||
base_indent + indent
|
||||
end
|
||||
end
|
||||
|
||||
LTYPE_TOKENS = %i[
|
||||
on_heredoc_beg on_tstring_beg
|
||||
on_regexp_beg on_symbeg on_backtick
|
||||
on_symbols_beg on_qsymbols_beg
|
||||
on_words_beg on_qwords_beg
|
||||
]
|
||||
LTYPE_TOKENS = %i[
|
||||
on_heredoc_beg on_tstring_beg
|
||||
on_regexp_beg on_symbeg on_backtick
|
||||
on_symbols_beg on_qsymbols_beg
|
||||
on_words_beg on_qwords_beg
|
||||
]
|
||||
|
||||
def ltype_from_open_tokens(opens)
|
||||
start_token = opens.reverse_each.find do |tok|
|
||||
LTYPE_TOKENS.include?(tok.event)
|
||||
end
|
||||
return nil unless start_token
|
||||
|
||||
case start_token&.event
|
||||
when :on_tstring_beg
|
||||
case start_token&.tok
|
||||
when ?" then ?"
|
||||
when /^%.$/ then ?"
|
||||
when /^%Q.$/ then ?"
|
||||
when ?' then ?'
|
||||
when /^%q.$/ then ?'
|
||||
def ltype_from_open_tokens(opens)
|
||||
start_token = opens.reverse_each.find do |tok|
|
||||
LTYPE_TOKENS.include?(tok.event)
|
||||
end
|
||||
when :on_regexp_beg then ?/
|
||||
when :on_symbeg then ?:
|
||||
when :on_backtick then ?`
|
||||
when :on_qwords_beg then ?]
|
||||
when :on_words_beg then ?]
|
||||
when :on_qsymbols_beg then ?]
|
||||
when :on_symbols_beg then ?]
|
||||
when :on_heredoc_beg
|
||||
start_token&.tok =~ /<<[-~]?(['"`])\w+\1/
|
||||
$1 || ?"
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
return nil unless start_token
|
||||
|
||||
def check_termination_in_prev_line(code)
|
||||
tokens = self.class.ripper_lex_without_warning(code, context: @context)
|
||||
past_first_newline = false
|
||||
index = tokens.rindex do |t|
|
||||
# traverse first token before last line
|
||||
if past_first_newline
|
||||
if t.tok.include?("\n")
|
||||
true
|
||||
case start_token&.event
|
||||
when :on_tstring_beg
|
||||
case start_token&.tok
|
||||
when ?" then ?"
|
||||
when /^%.$/ then ?"
|
||||
when /^%Q.$/ then ?"
|
||||
when ?' then ?'
|
||||
when /^%q.$/ then ?'
|
||||
end
|
||||
elsif t.tok.include?("\n")
|
||||
past_first_newline = true
|
||||
false
|
||||
when :on_regexp_beg then ?/
|
||||
when :on_symbeg then ?:
|
||||
when :on_backtick then ?`
|
||||
when :on_qwords_beg then ?]
|
||||
when :on_words_beg then ?]
|
||||
when :on_qsymbols_beg then ?]
|
||||
when :on_symbols_beg then ?]
|
||||
when :on_heredoc_beg
|
||||
start_token&.tok =~ /<<[-~]?(['"`])\w+\1/
|
||||
$1 || ?"
|
||||
else
|
||||
false
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
if index
|
||||
first_token = nil
|
||||
last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
|
||||
last_line_tokens.each do |t|
|
||||
unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
|
||||
first_token = t
|
||||
break
|
||||
def check_termination_in_prev_line(code)
|
||||
tokens = self.class.ripper_lex_without_warning(code, context: @context)
|
||||
past_first_newline = false
|
||||
index = tokens.rindex do |t|
|
||||
# traverse first token before last line
|
||||
if past_first_newline
|
||||
if t.tok.include?("\n")
|
||||
true
|
||||
end
|
||||
elsif t.tok.include?("\n")
|
||||
past_first_newline = true
|
||||
false
|
||||
else
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
if first_token && first_token.state != Ripper::EXPR_DOT
|
||||
tokens_without_last_line = tokens[0..index]
|
||||
code_without_last_line = tokens_without_last_line.map(&:tok).join
|
||||
opens_without_last_line = IRB::NestingParser.open_tokens(tokens_without_last_line)
|
||||
if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line)
|
||||
return last_line_tokens.map(&:tok).join
|
||||
if index
|
||||
first_token = nil
|
||||
last_line_tokens = tokens[(index + 1)..(tokens.size - 1)]
|
||||
last_line_tokens.each do |t|
|
||||
unless [:on_sp, :on_ignored_sp, :on_comment].include?(t.event)
|
||||
first_token = t
|
||||
break
|
||||
end
|
||||
end
|
||||
|
||||
if first_token && first_token.state != Ripper::EXPR_DOT
|
||||
tokens_without_last_line = tokens[0..index]
|
||||
code_without_last_line = tokens_without_last_line.map(&:tok).join
|
||||
opens_without_last_line = NestingParser.open_tokens(tokens_without_last_line)
|
||||
if code_terminated?(code_without_last_line, tokens_without_last_line, opens_without_last_line)
|
||||
return last_line_tokens.map(&:tok).join
|
||||
end
|
||||
end
|
||||
end
|
||||
false
|
||||
end
|
||||
false
|
||||
end
|
||||
# :startdoc:
|
||||
end
|
||||
# :startdoc:
|
||||
|
||||
RubyLex = IRB::RubyLex
|
||||
Object.deprecate_constant(:RubyLex)
|
||||
|
@ -14,7 +14,7 @@ module TestIRB
|
||||
end
|
||||
|
||||
def parse_by_line(code)
|
||||
IRB::NestingParser.parse_by_line(RubyLex.ripper_lex_without_warning(code))
|
||||
IRB::NestingParser.parse_by_line(IRB::RubyLex.ripper_lex_without_warning(code))
|
||||
end
|
||||
|
||||
def test_open_tokens
|
||||
@ -27,7 +27,7 @@ module TestIRB
|
||||
x: "
|
||||
#{p(1, 2, 3
|
||||
EOS
|
||||
opens = IRB::NestingParser.open_tokens(RubyLex.ripper_lex_without_warning(code))
|
||||
opens = IRB::NestingParser.open_tokens(IRB::RubyLex.ripper_lex_without_warning(code))
|
||||
assert_equal(%w[class def if do { " #{ (], opens.map(&:tok))
|
||||
end
|
||||
|
||||
|
@ -20,7 +20,7 @@ module TestIRB
|
||||
#{⑤&<<C|⑥
|
||||
EOC
|
||||
ripper_tokens = Ripper.tokenize(code)
|
||||
rubylex_tokens = RubyLex.ripper_lex_without_warning(code)
|
||||
rubylex_tokens = IRB::RubyLex.ripper_lex_without_warning(code)
|
||||
# Assert no missing part
|
||||
assert_equal(code, rubylex_tokens.map(&:tok).join)
|
||||
# Assert ripper tokens are not removed
|
||||
@ -83,7 +83,7 @@ module TestIRB
|
||||
end
|
||||
|
||||
def test_broken_percent_literal
|
||||
tokens = RubyLex.ripper_lex_without_warning('%wwww')
|
||||
tokens = IRB::RubyLex.ripper_lex_without_warning('%wwww')
|
||||
pos_to_index = {}
|
||||
tokens.each_with_index { |t, i|
|
||||
assert_nil(pos_to_index[t.pos], "There is already another token in the position of #{t.inspect}.")
|
||||
@ -92,7 +92,7 @@ module TestIRB
|
||||
end
|
||||
|
||||
def test_broken_percent_literal_in_method
|
||||
tokens = RubyLex.ripper_lex_without_warning(<<~EOC.chomp)
|
||||
tokens = IRB::RubyLex.ripper_lex_without_warning(<<~EOC.chomp)
|
||||
def foo
|
||||
%wwww
|
||||
end
|
||||
@ -106,7 +106,7 @@ module TestIRB
|
||||
|
||||
def test_unterminated_code
|
||||
['do', '<<A'].each do |code|
|
||||
tokens = RubyLex.ripper_lex_without_warning(code)
|
||||
tokens = IRB::RubyLex.ripper_lex_without_warning(code)
|
||||
assert_equal(code, tokens.map(&:tok).join, "Cannot reconstruct code from tokens")
|
||||
error_tokens = tokens.map(&:event).grep(/error/)
|
||||
assert_empty(error_tokens, 'Error tokens must be ignored if there is corresponding non-error token')
|
||||
@ -115,7 +115,7 @@ module TestIRB
|
||||
|
||||
def test_unterminated_heredoc_string_literal
|
||||
['<<A;<<B', "<<A;<<B\n", "%W[\#{<<A;<<B", "%W[\#{<<A;<<B\n"].each do |code|
|
||||
tokens = RubyLex.ripper_lex_without_warning(code)
|
||||
tokens = IRB::RubyLex.ripper_lex_without_warning(code)
|
||||
string_literal = IRB::NestingParser.open_tokens(tokens).last
|
||||
assert_equal('<<A', string_literal&.tok)
|
||||
end
|
||||
@ -150,7 +150,7 @@ module TestIRB
|
||||
|
||||
def test_assignment_expression
|
||||
context = build_context
|
||||
ruby_lex = RubyLex.new(context)
|
||||
ruby_lex = IRB::RubyLex.new(context)
|
||||
|
||||
[
|
||||
"foo = bar",
|
||||
@ -194,7 +194,7 @@ module TestIRB
|
||||
|
||||
def test_assignment_expression_with_local_variable
|
||||
context = build_context
|
||||
ruby_lex = RubyLex.new(context)
|
||||
ruby_lex = IRB::RubyLex.new(context)
|
||||
code = "a /1;x=1#/"
|
||||
refute(ruby_lex.assignment_expression?(code), "#{code}: should not be an assignment expression")
|
||||
context.workspace.binding.eval('a = 1')
|
||||
@ -202,6 +202,14 @@ module TestIRB
|
||||
refute(ruby_lex.assignment_expression?(""), "empty code should not be an assignment expression")
|
||||
end
|
||||
|
||||
def test_initialising_the_old_top_level_ruby_lex
|
||||
_, err = capture_output do
|
||||
::RubyLex.new(nil)
|
||||
end
|
||||
|
||||
assert_match(/warning: constant ::RubyLex is deprecated/, err)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def build_context(local_variables = nil)
|
||||
@ -239,7 +247,7 @@ module TestIRB
|
||||
def check_state(lines, local_variables: [])
|
||||
context = build_context(local_variables)
|
||||
code = lines.map { |l| "#{l}\n" }.join # code should end with "\n"
|
||||
ruby_lex = RubyLex.new(context)
|
||||
ruby_lex = IRB::RubyLex.new(context)
|
||||
tokens, opens, terminated = ruby_lex.check_code_state(code)
|
||||
indent_level = ruby_lex.calc_indent_level(opens)
|
||||
continue = ruby_lex.should_continue?(tokens)
|
||||
|
Loading…
x
Reference in New Issue
Block a user