[ruby/rdoc] Revert "Refactor RDoc::Markup::Parser#tokenize"

This reverts commit https://github.com/ruby/rdoc/commit/41ceae93b3bc.

https://github.com/ruby/rdoc/commit/5d2c47e8b8
This commit is contained in:
Nobuyoshi Nakada 2022-11-28 04:19:01 +09:00 committed by git
parent 21977b95e2
commit 5b1db79129
2 changed files with 115 additions and 116 deletions

View File

@ -272,11 +272,44 @@ class RDoc::Markup::Parser
end
case type
when :HEADER then
line << '=' * data
_, _, peek_column, = peek_token
peek_column ||= column + data
indent = peek_column - column - data
line << ' ' * indent
when :RULE then
width = 2 + data
line << '-' * width
_, _, peek_column, = peek_token
peek_column ||= column + width
indent = peek_column - column - width
line << ' ' * indent
when :BREAK, :TEXT then
line << data
else
raise TypeError, "unexpected token under verbatim: #{type}"
when :BLOCKQUOTE then
line << '>>>'
peek_type, _, peek_column = peek_token
if peek_type != :NEWLINE and peek_column
line << ' ' * (peek_column - column - 3)
end
else # *LIST_TOKENS
list_marker = case type
when :BULLET then data
when :LABEL then "[#{data}]"
when :NOTE then "#{data}::"
else # :LALPHA, :NUMBER, :UALPHA
"#{data}."
end
line << list_marker
peek_type, _, peek_column = peek_token
unless peek_type == :NEWLINE then
peek_column ||= column + list_marker.length
indent = peek_column - column - list_marker.length
line << ' ' * indent
end
end
end
verbatim << line << "\n" unless line.empty?
@ -448,37 +481,11 @@ class RDoc::Markup::Parser
##
# Turns text +input+ into a stream of tokens
def tokenize(input)
def tokenize input
setup_scanner input
margin = @s.pos[0]
tokenize_indented(margin)
tokenize_input(margin)
end
def newline!(pos = nil)
if pos or (@s.scan(/ *(?=\r?\n)/) and pos = @s.pos and @s.scan(/\r?\n/))
@tokens << [:NEWLINE, @s.matched, *pos]
@s.newline!
end
end
def tokenize_indented(column)
indent = / {#{column+1},}(?=\S)| *(?=\r?\n)/
while @s.scan(indent)
until @s.eos? do
pos = @s.pos
if @s.scan(/(.+)(?=\r?\n)?/)
@tokens << [:TEXT, @s.matched, *pos]
end
newline! or break
end
end
def tokenize_input(margin)
column = 0
until @s.eos?
pos = @s.pos
break if pos[0] < (margin ||= pos[0])
# leading spaces will be reflected by the column of the next token
# the only thing we loose are trailing spaces at the end of the file
@ -487,84 +494,75 @@ class RDoc::Markup::Parser
# note: after BULLET, LABEL, etc.,
# indent will be the column of the next non-newline token
case
# [CR]LF => :NEWLINE
when @s.scan(/\r?\n/)
newline!(pos)
next
@tokens << case
# [CR]LF => :NEWLINE
when @s.scan(/\r?\n/) then
token = [:NEWLINE, @s.matched, *pos]
@s.newline!
token
# === text => :HEADER then :TEXT
when @s.scan(/(=+)(\s*)/) then
level = @s[1].length
header = [:HEADER, level, *pos]
# === text => :HEADER then :TEXT
when @s.scan(/(=+)(\s*)/)
level = @s[1].length
header = [:HEADER, level, *pos]
if @s[2] =~ /^\r?\n/ then
@s.unscan(@s[2])
header
else
pos = @s.pos
@s.scan(/.*/)
@tokens << header
[:TEXT, @s.matched.sub(/\r$/, ''), *pos]
end
# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/) then
[:RULE, @s[1].length - 2, *pos]
# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(\S)/) then
@s.unscan(@s[2])
[:BULLET, @s[1], *pos]
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
@s.unscan(@s[2])
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
[list_type, list_label, *pos]
# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/) then
[:LABEL, @s[1], *pos]
# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/) then
[:NOTE, @s[1], *pos]
# >>> followed by end of line => :BLOCKQUOTE
when @s.scan(/>>> *(\w+)?$/) then
if word = @s[1]
@s.unscan(word)
end
[:BLOCKQUOTE, word, *pos]
# anything else: :TEXT
else
@s.scan(/(.*?)( )?\r?$/)
token = [:TEXT, @s[1], *pos]
if @s[2] =~ /^\r?\n/
@s.unscan(@s[2])
@tokens << header
else
pos = @s.pos
@s.scan(/.*/)
@tokens << header
@tokens << [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
end
# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/)
@tokens << [:RULE, @s[1].length - 2, *pos]
# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(?=\S)/)
@tokens << [:BULLET, @s[1], *pos]
tokenize_input(nil)
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(?=\S)/i)
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
@tokens << [list_type, list_label, *pos]
tokenize_input(nil)
# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/)
@tokens << [:LABEL, @s[1], *pos]
tokenize_input(nil)
# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/)
@tokens << [:NOTE, @s[1], *pos]
tokenize_input(nil)
# >>> followed by end of line => :BLOCKQUOTE
when @s.scan(/>>> *(\w+)?\r?$/)
@tokens << [:BLOCKQUOTE, @s[1], *pos]
newline!
tokenize_input(nil)
# anything else: :TEXT
else
column = pos[0]
@s.scan(/(.*?)( )?\r?$/)
@tokens << [:TEXT, @s[1], *pos]
if @s[2]
@tokens << [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
end
if newline!
tokenize_indented(column)
end
end
if @s[2] then
@tokens << token
[:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
else
token
end
end
end
self

View File

@ -1591,7 +1591,8 @@ Example heading:
[:TEXT, 'Example heading:', 0, 0],
[:NEWLINE, "\n", 16, 0],
[:NEWLINE, "\n", 0, 1],
[:TEXT, '=== heading three', 3, 2],
[:HEADER, 3, 3, 2],
[:TEXT, 'heading three', 7, 2],
[:NEWLINE, "\n", 20, 2],
]
@ -1607,7 +1608,7 @@ Example heading:
expected = [
[:TEXT, 'Verbatim section here that is double-underlined', 2, 0],
[:NEWLINE, "\n", 49, 0],
[:TEXT, '='*47, 2, 1],
[:HEADER, 47, 2, 1],
[:NEWLINE, "\n", 49, 1],
]
@ -1623,14 +1624,14 @@ Example heading:
STR
expected = [
[:TEXT, 'A', 2, 0],
[:NEWLINE, "\n", 3, 0],
[:TEXT, 'b', 4, 1],
[:NEWLINE, "\n", 5, 1],
[:TEXT, '='*47, 2, 2],
[:NEWLINE, "\n", 49, 2],
[:TEXT, 'c', 4, 3],
[:NEWLINE, "\n", 5, 3],
[:TEXT, 'A', 2, 0],
[:NEWLINE, "\n", 3, 0],
[:TEXT, 'b', 4, 1],
[:NEWLINE, "\n", 5, 1],
[:HEADER, 47, 2, 2],
[:NEWLINE, "\n", 49, 2],
[:TEXT, 'c', 4, 3],
[:NEWLINE, "\n", 5, 3],
]
assert_equal expected, @RMP.tokenize(str)