[ruby/rdoc] Refine regexp usages and reduce substring allocations

https://github.com/ruby/rdoc/commit/a976fb9d39
2021-12-09 00:10:33 +09:00 · 2021-12-09 00:10:33 +09:00 · 511864d1a7
commit 511864d1a7
parent 1b67c58f41
3 changed files with 27 additions and 31 deletions
--- a/lib/rdoc/generator/markup.rb
+++ b/lib/rdoc/generator/markup.rb
@ -109,7 +109,7 @@ class RDoc::MethodAttr
    lines.shift if src =~ /\A.*#\ *File/i # remove '# File' comment
    lines.each do |line|
      if line =~ /^ *(?=\S)/
-        n = $&.length
+        n = $~.end(0)
        indent = n if n < indent
        break if n == 0
      end
--- a/lib/rdoc/markup/attribute_manager.rb
+++ b/lib/rdoc/markup/attribute_manager.rb
@ -2,6 +2,17 @@
 ##
 # Manages changes of attributes in a block of text
 unless MatchData.method_defined?(:match_length)
  using Module.new {
    refine(MatchData) {
      def match_length(nth)
        b, e = offset(nth)
        e - b if b
      end
    }
  }
 end
 class RDoc::Markup::AttributeManager
  ##
@ -153,16 +164,17 @@ class RDoc::Markup::AttributeManager
    tags = "[#{tags.join("")}](?!#{PROTECT_ATTR})"
    all_tags = "[#{@matching_word_pairs.keys.join("")}](?!#{PROTECT_ATTR})"
-    re = /(^|\W|#{all_tags})(#{tags})(\2*[#\\]?[\w:#{PROTECT_ATTR}.\/\[\]-]+?\S?)\2(?!\2)(#{all_tags}|\W|$)/
+    re = /(?:^|\W|#{all_tags})\K(#{tags})(\1*[#\\]?[\w:#{PROTECT_ATTR}.\/\[\]-]+?\S?)\1(?!\1)(?=#{all_tags}|\W|$)/
    1 while str.gsub!(re) { |orig|
-      attr = @matching_word_pairs[$2]
+      a, w = (m = $~).values_at(1, 2)
-      attr_updated = attrs.set_attrs($`.length + $1.length + $2.length, $3.length, attr)
+      attr = @matching_word_pairs[a]
-      if attr_updated
+      if attrs.set_attrs(m.begin(2), w.length, attr)
-        $1 + NULL * $2.length + $3 + NULL * $2.length + $4
+        a = NULL * a.length
      else
-        $1 + NON_PRINTING_START + $2 + NON_PRINTING_END + $3 + NON_PRINTING_START + $2 + NON_PRINTING_END + $4
+        a = NON_PRINTING_START + a + NON_PRINTING_END
      end
      a + w + a
    }
    str.delete!(NON_PRINTING_START + NON_PRINTING_END)
  end
@ -173,9 +185,10 @@ class RDoc::Markup::AttributeManager
      @word_pair_map.each do |regexp, attr|
        next unless exclusive == exclusive?(attr)
        1 while str.gsub!(regexp) { |orig|
-          updated = attrs.set_attrs($`.length + $1.length, $2.length, attr)
+          w = (m = ($~))[2]
          updated = attrs.set_attrs(m.begin(2), w.length, attr)
          if updated
-            NULL * $1.length + $2 + NULL * $3.length
+            NULL * m.match_length(1) + w + NULL * m.match_length(3)
          else
            orig
          end
@ -194,9 +207,9 @@ class RDoc::Markup::AttributeManager
    1 while str.gsub!(/<(#{tags})>(.*?)<\/\1>/i) { |orig|
      attr = @html_tags[$1.downcase]
-      html_length = $1.length + 2
+      html_length = $~.match_length(1) + 2 # "<>".length
      seq = NULL * html_length
-      attrs.set_attrs($`.length + html_length, $2.length, attr)
+      attrs.set_attrs($~.begin(2), $~.match_length(2), attr)
      seq + $2 + seq + NULL
    }
  end
--- a/lib/rdoc/markup/to_rdoc.rb
+++ b/lib/rdoc/markup/to_rdoc.rb
@ -330,31 +330,14 @@ class RDoc::Markup::ToRdoc < RDoc::Markup::Formatter
    text_len = 20 if text_len < 20
    re = /^(.{0,#{text_len}})[ \n]/
    next_prefix = ' ' * @indent
    prefix = @prefix || next_prefix
    @prefix = nil
-    @res << prefix
+    text.scan(/\G(?:([^ \n]{#{text_len}})(?=[^ \n])|(.{1,#{text_len}})(?:[ \n]|\z))/) do
-
+      @res << prefix << ($1 || $2) << "\n"
-    while text.length > text_len
+      prefix = next_prefix
      if text =~ re then
        @res << $1
        text.slice!(0, $&.length)
      else
        @res << text.slice!(0, text_len)
      end
      @res << "\n" << next_prefix
    end
    if text.empty? then
      @res.pop
      @res.pop
    else
      @res << text
      @res << "\n"
    end
  end