[ruby/rdoc] Refactor RDoc::Markup::Parser#tokenize

Make verbatims text or newline only, and simplify `build_verbatim`.

https://github.com/ruby/rdoc/commit/41ceae93b3
This commit is contained in:
Nobuyoshi Nakada 2022-10-05 20:07:02 +09:00 committed by git
parent 29862ce273
commit 21977b95e2
2 changed files with 116 additions and 115 deletions

View File

@ -272,44 +272,11 @@ class RDoc::Markup::Parser
end end
case type case type
when :HEADER then
line << '=' * data
_, _, peek_column, = peek_token
peek_column ||= column + data
indent = peek_column - column - data
line << ' ' * indent
when :RULE then
width = 2 + data
line << '-' * width
_, _, peek_column, = peek_token
peek_column ||= column + width
indent = peek_column - column - width
line << ' ' * indent
when :BREAK, :TEXT then when :BREAK, :TEXT then
line << data line << data
when :BLOCKQUOTE then else
line << '>>>' raise TypeError, "unexpected token under verbatim: #{type}"
peek_type, _, peek_column = peek_token
if peek_type != :NEWLINE and peek_column
line << ' ' * (peek_column - column - 3)
end
else # *LIST_TOKENS
list_marker = case type
when :BULLET then data
when :LABEL then "[#{data}]"
when :NOTE then "#{data}::"
else # :LALPHA, :NUMBER, :UALPHA
"#{data}."
end
line << list_marker
peek_type, _, peek_column = peek_token
unless peek_type == :NEWLINE then
peek_column ||= column + list_marker.length
indent = peek_column - column - list_marker.length
line << ' ' * indent
end
end end
end end
verbatim << line << "\n" unless line.empty? verbatim << line << "\n" unless line.empty?
@ -481,11 +448,37 @@ class RDoc::Markup::Parser
## ##
# Turns text +input+ into a stream of tokens # Turns text +input+ into a stream of tokens
def tokenize input def tokenize(input)
setup_scanner input setup_scanner input
margin = @s.pos[0]
tokenize_indented(margin)
tokenize_input(margin)
end
until @s.eos? do def newline!(pos = nil)
if pos or (@s.scan(/ *(?=\r?\n)/) and pos = @s.pos and @s.scan(/\r?\n/))
@tokens << [:NEWLINE, @s.matched, *pos]
@s.newline!
end
end
def tokenize_indented(column)
indent = / {#{column+1},}(?=\S)| *(?=\r?\n)/
while @s.scan(indent)
pos = @s.pos pos = @s.pos
if @s.scan(/(.+)(?=\r?\n)?/)
@tokens << [:TEXT, @s.matched, *pos]
end
newline! or break
end
end
def tokenize_input(margin)
column = 0
until @s.eos?
pos = @s.pos
break if pos[0] < (margin ||= pos[0])
# leading spaces will be reflected by the column of the next token # leading spaces will be reflected by the column of the next token
# the only thing we loose are trailing spaces at the end of the file # the only thing we loose are trailing spaces at the end of the file
@ -494,75 +487,84 @@ class RDoc::Markup::Parser
# note: after BULLET, LABEL, etc., # note: after BULLET, LABEL, etc.,
# indent will be the column of the next non-newline token # indent will be the column of the next non-newline token
@tokens << case case
# [CR]LF => :NEWLINE # [CR]LF => :NEWLINE
when @s.scan(/\r?\n/) then when @s.scan(/\r?\n/)
token = [:NEWLINE, @s.matched, *pos] newline!(pos)
@s.newline! next
token
# === text => :HEADER then :TEXT
when @s.scan(/(=+)(\s*)/) then
level = @s[1].length
header = [:HEADER, level, *pos]
if @s[2] =~ /^\r?\n/ then # === text => :HEADER then :TEXT
@s.unscan(@s[2]) when @s.scan(/(=+)(\s*)/)
header level = @s[1].length
else header = [:HEADER, level, *pos]
pos = @s.pos
@s.scan(/.*/)
@tokens << header
[:TEXT, @s.matched.sub(/\r$/, ''), *pos]
end
# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/) then
[:RULE, @s[1].length - 2, *pos]
# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(\S)/) then
@s.unscan(@s[2])
[:BULLET, @s[1], *pos]
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(\S)/i) then
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
@s.unscan(@s[2])
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
[list_type, list_label, *pos]
# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/) then
[:LABEL, @s[1], *pos]
# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/) then
[:NOTE, @s[1], *pos]
# >>> followed by end of line => :BLOCKQUOTE
when @s.scan(/>>> *(\w+)?$/) then
if word = @s[1]
@s.unscan(word)
end
[:BLOCKQUOTE, word, *pos]
# anything else: :TEXT
else
@s.scan(/(.*?)( )?\r?$/)
token = [:TEXT, @s[1], *pos]
if @s[2] then if @s[2] =~ /^\r?\n/
@tokens << token @s.unscan(@s[2])
[:BREAK, @s[2], pos[0] + @s[1].length, pos[1]] @tokens << header
else else
token pos = @s.pos
end @s.scan(/.*/)
end @tokens << header
@tokens << [:TEXT, @s.matched.sub(/\r$/, ''), *pos]
end
# --- (at least 3) and nothing else on the line => :RULE
when @s.scan(/(-{3,}) *\r?$/)
@tokens << [:RULE, @s[1].length - 2, *pos]
# * or - followed by white space and text => :BULLET
when @s.scan(/([*-]) +(?=\S)/)
@tokens << [:BULLET, @s[1], *pos]
tokenize_input(nil)
# A. text, a. text, 12. text => :UALPHA, :LALPHA, :NUMBER
when @s.scan(/([a-z]|\d+)\. +(?=\S)/i)
# FIXME if tab(s), the column will be wrong
# either support tabs everywhere by first expanding them to
# spaces, or assume that they will have been replaced
# before (and provide a check for that at least in debug
# mode)
list_label = @s[1]
list_type =
case list_label
when /[a-z]/ then :LALPHA
when /[A-Z]/ then :UALPHA
when /\d/ then :NUMBER
else
raise ParseError, "BUG token #{list_label}"
end
@tokens << [list_type, list_label, *pos]
tokenize_input(nil)
# [text] followed by spaces or end of line => :LABEL
when @s.scan(/\[(.*?)\]( +|\r?$)/)
@tokens << [:LABEL, @s[1], *pos]
tokenize_input(nil)
# text:: followed by spaces or end of line => :NOTE
when @s.scan(/(.*?)::( +|\r?$)/)
@tokens << [:NOTE, @s[1], *pos]
tokenize_input(nil)
# >>> followed by end of line => :BLOCKQUOTE
when @s.scan(/>>> *(\w+)?\r?$/)
@tokens << [:BLOCKQUOTE, @s[1], *pos]
newline!
tokenize_input(nil)
# anything else: :TEXT
else
column = pos[0]
@s.scan(/(.*?)( )?\r?$/)
@tokens << [:TEXT, @s[1], *pos]
if @s[2]
@tokens << [:BREAK, @s[2], pos[0] + @s[1].length, pos[1]]
end
if newline!
tokenize_indented(column)
end
end
end end
self self

View File

@ -1591,8 +1591,7 @@ Example heading:
[:TEXT, 'Example heading:', 0, 0], [:TEXT, 'Example heading:', 0, 0],
[:NEWLINE, "\n", 16, 0], [:NEWLINE, "\n", 16, 0],
[:NEWLINE, "\n", 0, 1], [:NEWLINE, "\n", 0, 1],
[:HEADER, 3, 3, 2], [:TEXT, '=== heading three', 3, 2],
[:TEXT, 'heading three', 7, 2],
[:NEWLINE, "\n", 20, 2], [:NEWLINE, "\n", 20, 2],
] ]
@ -1608,7 +1607,7 @@ Example heading:
expected = [ expected = [
[:TEXT, 'Verbatim section here that is double-underlined', 2, 0], [:TEXT, 'Verbatim section here that is double-underlined', 2, 0],
[:NEWLINE, "\n", 49, 0], [:NEWLINE, "\n", 49, 0],
[:HEADER, 47, 2, 1], [:TEXT, '='*47, 2, 1],
[:NEWLINE, "\n", 49, 1], [:NEWLINE, "\n", 49, 1],
] ]
@ -1624,14 +1623,14 @@ Example heading:
STR STR
expected = [ expected = [
[:TEXT, 'A', 2, 0], [:TEXT, 'A', 2, 0],
[:NEWLINE, "\n", 3, 0], [:NEWLINE, "\n", 3, 0],
[:TEXT, 'b', 4, 1], [:TEXT, 'b', 4, 1],
[:NEWLINE, "\n", 5, 1], [:NEWLINE, "\n", 5, 1],
[:HEADER, 47, 2, 2], [:TEXT, '='*47, 2, 2],
[:NEWLINE, "\n", 49, 2], [:NEWLINE, "\n", 49, 2],
[:TEXT, 'c', 4, 3], [:TEXT, 'c', 4, 3],
[:NEWLINE, "\n", 5, 3], [:NEWLINE, "\n", 5, 3],
] ]
assert_equal expected, @RMP.tokenize(str) assert_equal expected, @RMP.tokenize(str)