[ruby/reline] Refactor Reline::Unicode ed_ vi_ em_ methods

(https://github.com/ruby/reline/pull/720) * Refactor Reline::Unicode vi_ ed_ em_ methods * Make Reline::Unicode's vi_ ed_ em_ method encoding safe https://github.com/ruby/reline/commit/cdd7288978
2024-12-15 20:50:39 +09:00 · 2024-12-15 20:50:39 +09:00 · 2c57b87cc3
commit 2c57b87cc3
parent 5c372969ad
2 changed files with 143 additions and 360 deletions
--- a/lib/reline/unicode.rb
+++ b/lib/reline/unicode.rb
@ -262,375 +262,126 @@ class Reline::Unicode
  end
  def self.em_forward_word(line, byte_pointer)
-    byte_size = 0
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
-    while line.bytesize > (byte_pointer + byte_size)
+    nonwords = gcs.take_while { |c| !word_character?(c) }
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
+    nonwords.sum(&:bytesize) + words.sum(&:bytesize)
      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
      byte_size += size
    end
    while line.bytesize > (byte_pointer + byte_size)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
      byte_size += size
    end
    byte_size
  end
  def self.em_forward_word_with_capitalization(line, byte_pointer)
-    byte_size = 0
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
-    new_str = String.new
+    nonwords = gcs.take_while { |c| !word_character?(c) }
-    while line.bytesize > (byte_pointer + byte_size)
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
+    [nonwords.sum(&:bytesize) + words.sum(&:bytesize), nonwords.join + words.join.capitalize]
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
      new_str += mbchar
      byte_size += size
    end
    first = true
    while line.bytesize > (byte_pointer + byte_size)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
      if first
        new_str += mbchar.upcase
        first = false
      else
        new_str += mbchar.downcase
      end
      byte_size += size
    end
    [byte_size, new_str]
  end
  def self.em_backward_word(line, byte_pointer)
-    byte_size = 0
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
-    while 0 < (byte_pointer - byte_size)
+    nonwords = gcs.take_while { |c| !word_character?(c) }
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
+    words = gcs.drop(nonwords.size).take_while { |c| word_character?(c) }
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
+    nonwords.sum(&:bytesize) + words.sum(&:bytesize)
      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
      byte_size += size
    end
    while 0 < (byte_pointer - byte_size)
      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
      byte_size += size
    end
    byte_size
  end
  def self.em_big_backward_word(line, byte_pointer)
-    byte_size = 0
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
-    while 0 < (byte_pointer - byte_size)
+    spaces = gcs.take_while { |c| space_character?(c) }
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
+    spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
      break if mbchar =~ /\S/
      byte_size += size
    end
    while 0 < (byte_pointer - byte_size)
      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
      break if mbchar =~ /\s/
      byte_size += size
    end
    byte_size
  end
  def self.ed_transpose_words(line, byte_pointer)
-    right_word_start = nil
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters
-    size = get_next_mbchar_size(line, byte_pointer)
+    pos = gcs.size
-    mbchar = line.byteslice(byte_pointer, size)
+    gcs += line.byteslice(byte_pointer..).grapheme_clusters
-    if size.zero?
+    pos += 1 while pos < gcs.size && !word_character?(gcs[pos])
-      # ' aaa bbb [cursor]'
+    if pos == gcs.size # 'aaa  bbb [cursor] '
-      byte_size = 0
+      pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
-      while 0 < (byte_pointer + byte_size)
+      second_word_end = gcs.size
-        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
+    else # 'aaa  [cursor]bbb'
-        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
+      pos += 1 while pos < gcs.size && word_character?(gcs[pos])
-        break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
+      second_word_end = pos
        byte_size -= size
      end
      while 0 < (byte_pointer + byte_size)
        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
        byte_size -= size
      end
      right_word_start = byte_pointer + byte_size
      byte_size = 0
      while line.bytesize > (byte_pointer + byte_size)
        size = get_next_mbchar_size(line, byte_pointer + byte_size)
        mbchar = line.byteslice(byte_pointer + byte_size, size)
        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
        byte_size += size
      end
      after_start = byte_pointer + byte_size
    elsif mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
      # ' aaa bb[cursor]b'
      byte_size = 0
      while 0 < (byte_pointer + byte_size)
        size = get_prev_mbchar_size(line, byte_pointer + byte_size)
        mbchar = line.byteslice(byte_pointer + byte_size - size, size)
        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
        byte_size -= size
      end
      right_word_start = byte_pointer + byte_size
      byte_size = 0
      while line.bytesize > (byte_pointer + byte_size)
        size = get_next_mbchar_size(line, byte_pointer + byte_size)
        mbchar = line.byteslice(byte_pointer + byte_size, size)
        break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
        byte_size += size
      end
      after_start = byte_pointer + byte_size
    else
      byte_size = 0
      while (line.bytesize - 1) > (byte_pointer + byte_size)
        size = get_next_mbchar_size(line, byte_pointer + byte_size)
        mbchar = line.byteslice(byte_pointer + byte_size, size)
        break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
        byte_size += size
      end
      if (byte_pointer + byte_size) == (line.bytesize - 1)
        # ' aaa bbb [cursor] '
        after_start = line.bytesize
        while 0 < (byte_pointer + byte_size)
          size = get_prev_mbchar_size(line, byte_pointer + byte_size)
          mbchar = line.byteslice(byte_pointer + byte_size - size, size)
          break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
          byte_size -= size
        end
        while 0 < (byte_pointer + byte_size)
          size = get_prev_mbchar_size(line, byte_pointer + byte_size)
          mbchar = line.byteslice(byte_pointer + byte_size - size, size)
          break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
          byte_size -= size
        end
        right_word_start = byte_pointer + byte_size
      else
        # ' aaa [cursor] bbb '
        right_word_start = byte_pointer + byte_size
        while line.bytesize > (byte_pointer + byte_size)
          size = get_next_mbchar_size(line, byte_pointer + byte_size)
          mbchar = line.byteslice(byte_pointer + byte_size, size)
          break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
          byte_size += size
        end
        after_start = byte_pointer + byte_size
      end
    end
-    byte_size = right_word_start - byte_pointer
+    pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
-    while 0 < (byte_pointer + byte_size)
+    second_word_start = pos
-      size = get_prev_mbchar_size(line, byte_pointer + byte_size)
+    pos -= 1 while pos > 0 && !word_character?(gcs[pos - 1])
-      mbchar = line.byteslice(byte_pointer + byte_size - size, size)
+    first_word_end = pos
-      break if mbchar.encode(Encoding::UTF_8) =~ /\p{Word}/
+    pos -= 1 while pos > 0 && word_character?(gcs[pos - 1])
-      byte_size -= size
+    first_word_start = pos
    [first_word_start, first_word_end, second_word_start, second_word_end].map do |idx|
      gcs.take(idx).sum(&:bytesize)
    end
    middle_start = byte_pointer + byte_size
    byte_size = middle_start - byte_pointer
    while 0 < (byte_pointer + byte_size)
      size = get_prev_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size - size, size)
      break if mbchar.encode(Encoding::UTF_8) =~ /\P{Word}/
      byte_size -= size
    end
    left_word_start = byte_pointer + byte_size
    [left_word_start, middle_start, right_word_start, after_start]
  end
  def self.vi_big_forward_word(line, byte_pointer)
-    byte_size = 0
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
+    nonspaces = gcs.take_while { |c| !space_character?(c) }
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
+    spaces = gcs.drop(nonspaces.size).take_while { |c| space_character?(c) }
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
+    nonspaces.sum(&:bytesize) + spaces.sum(&:bytesize)
      break if mbchar =~ /\s/
      byte_size += size
    end
    while (line.bytesize - 1) > (byte_pointer + byte_size)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      break if mbchar =~ /\S/
      byte_size += size
    end
    byte_size
  end
  def self.vi_big_forward_end_word(line, byte_pointer)
-    if (line.bytesize - 1) > byte_pointer
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
-      size = get_next_mbchar_size(line, byte_pointer)
+    first = gcs.shift(1)
-      byte_size = size
+    spaces = gcs.take_while { |c| space_character?(c) }
-    else
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
-      return 0
+    matched = spaces + nonspaces
-    end
+    matched.pop
-    while (line.bytesize - 1) > (byte_pointer + byte_size)
+    first.sum(&:bytesize) + matched.sum(&:bytesize)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      break if mbchar =~ /\S/
      byte_size += size
    end
    prev_byte_size = byte_size
    while line.bytesize > (byte_pointer + byte_size)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      break if mbchar =~ /\s/
      prev_byte_size = byte_size
      byte_size += size
    end
    prev_byte_size
  end
  def self.vi_big_backward_word(line, byte_pointer)
-    byte_size = 0
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
-    while 0 < (byte_pointer - byte_size)
+    spaces = gcs.take_while { |c| space_character?(c) }
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
+    nonspaces = gcs.drop(spaces.size).take_while { |c| !space_character?(c) }
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
+    spaces.sum(&:bytesize) + nonspaces.sum(&:bytesize)
      break if mbchar =~ /\S/
      byte_size += size
    end
    while 0 < (byte_pointer - byte_size)
      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
      break if mbchar =~ /\s/
      byte_size += size
    end
    byte_size
  end
  def self.vi_forward_word(line, byte_pointer, drop_terminate_spaces = false)
-    if line.bytesize > byte_pointer
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
-      size = get_next_mbchar_size(line, byte_pointer)
+    return 0 if gcs.empty?
-      mbchar = line.byteslice(byte_pointer, size)
+
-      if mbchar =~ /\w/
+    c = gcs.first
-        started_by = :word
+    matched =
-      elsif mbchar =~ /\s/
+      if word_character?(c)
-        started_by = :space
+        gcs.take_while { |c| word_character?(c) }
      elsif space_character?(c)
        gcs.take_while { |c| space_character?(c) }
      else
-        started_by = :non_word_printable
+        gcs.take_while { |c| !word_character?(c) && !space_character?(c) }
      end
-      byte_size = size
+
-    else
+    return matched.sum(&:bytesize) if drop_terminate_spaces
-      return 0
+
-    end
+    spaces = gcs.drop(matched.size).take_while { |c| space_character?(c) }
-    while line.bytesize > (byte_pointer + byte_size)
+    matched.sum(&:bytesize) + spaces.sum(&:bytesize)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      case started_by
      when :word
        break if mbchar =~ /\W/
      when :space
        break if mbchar =~ /\S/
      when :non_word_printable
        break if mbchar =~ /\w|\s/
      end
      byte_size += size
    end
    return byte_size if drop_terminate_spaces
    while line.bytesize > (byte_pointer + byte_size)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      break if mbchar =~ /\S/
      byte_size += size
    end
    byte_size
  end
  def self.vi_forward_end_word(line, byte_pointer)
-    if (line.bytesize - 1) > byte_pointer
+    gcs = line.byteslice(byte_pointer..).grapheme_clusters
-      size = get_next_mbchar_size(line, byte_pointer)
+    return 0 if gcs.empty?
-      mbchar = line.byteslice(byte_pointer, size)
+    return gcs.first.bytesize if gcs.size == 1
-      if mbchar =~ /\w/
+
-        started_by = :word
+    start = gcs.shift
-      elsif mbchar =~ /\s/
+    skips = [start]
-        started_by = :space
+    if space_character?(start) || space_character?(gcs.first)
-      else
+      spaces = gcs.take_while { |c| space_character?(c) }
-        started_by = :non_word_printable
+      skips += spaces
-      end
+      gcs.shift(spaces.size)
      byte_size = size
    else
      return 0
    end
-    if (line.bytesize - 1) > (byte_pointer + byte_size)
+    start_with_word = word_character?(gcs.first)
-      size = get_next_mbchar_size(line, byte_pointer + byte_size)
+    matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
-      mbchar = line.byteslice(byte_pointer + byte_size, size)
+    matched.pop
-      if mbchar =~ /\w/
+    skips.sum(&:bytesize) + matched.sum(&:bytesize)
        second = :word
      elsif mbchar =~ /\s/
        second = :space
      else
        second = :non_word_printable
      end
      second_byte_size = size
    else
      return byte_size
    end
    if second == :space
      byte_size += second_byte_size
      while (line.bytesize - 1) > (byte_pointer + byte_size)
        size = get_next_mbchar_size(line, byte_pointer + byte_size)
        mbchar = line.byteslice(byte_pointer + byte_size, size)
        if mbchar =~ /\S/
          if mbchar =~ /\w/
            started_by = :word
          else
            started_by = :non_word_printable
          end
          break
        end
        byte_size += size
      end
    else
      case [started_by, second]
      when [:word, :non_word_printable], [:non_word_printable, :word]
        started_by = second
      else
        byte_size += second_byte_size
        started_by = second
      end
    end
    prev_byte_size = byte_size
    while line.bytesize > (byte_pointer + byte_size)
      size = get_next_mbchar_size(line, byte_pointer + byte_size)
      mbchar = line.byteslice(byte_pointer + byte_size, size)
      case started_by
      when :word
        break if mbchar =~ /\W/
      when :non_word_printable
        break if mbchar =~ /[\w\s]/
      end
      prev_byte_size = byte_size
      byte_size += size
    end
    prev_byte_size
  end
  def self.vi_backward_word(line, byte_pointer)
-    byte_size = 0
+    gcs = line.byteslice(0, byte_pointer).grapheme_clusters.reverse
-    while 0 < (byte_pointer - byte_size)
+    spaces = gcs.take_while { |c| space_character?(c) }
-      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
+    gcs.shift(spaces.size)
-      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
+    start_with_word = word_character?(gcs.first)
-      if mbchar =~ /\S/
+    matched = gcs.take_while { |c| start_with_word ? word_character?(c) : !word_character?(c) && !space_character?(c) }
-        if mbchar =~ /\w/
+    spaces.sum(&:bytesize) + matched.sum(&:bytesize)
          started_by = :word
        else
          started_by = :non_word_printable
        end
        break
      end
      byte_size += size
    end
    while 0 < (byte_pointer - byte_size)
      size = get_prev_mbchar_size(line, byte_pointer - byte_size)
      mbchar = line.byteslice(byte_pointer - byte_size - size, size)
      case started_by
      when :word
        break if mbchar =~ /\W/
      when :non_word_printable
        break if mbchar =~ /[\w\s]/
      end
      byte_size += size
    end
    byte_size
  end
  def self.common_prefix(list, ignore_case: false)
@ -647,15 +398,18 @@ class Reline::Unicode
  end
  def self.vi_first_print(line)
-    byte_size = 0
+    gcs = line.grapheme_clusters
-    while (line.bytesize - 1) > byte_size
+    spaces = gcs.take_while { |c| space_character?(c) }
-      size = get_next_mbchar_size(line, byte_size)
+    spaces.sum(&:bytesize)
-      mbchar = line.byteslice(byte_size, size)
+  end
-      if mbchar =~ /\S/
+
-        break
+  def self.word_character?(s)
-      end
+    s.encode(Encoding::UTF_8).match?(/\p{Word}/) if s
-      byte_size += size
+  rescue Encoding::UndefinedConversionError
-    end
+    false
-    byte_size
+  end
  def self.space_character?(s)
    s.match?(/\s/) if s
  end
 end
--- a/test/reline/test_unicode.rb
+++ b/test/reline/test_unicode.rb
@ -147,6 +147,7 @@ class Reline::Unicode::Test < Reline::TestCase
  def test_em_forward_word
    assert_equal(12, Reline::Unicode.em_forward_word('abc---fooあbar-baz', 3))
    assert_equal(11, Reline::Unicode.em_forward_word('abc---fooあbar-baz'.encode('sjis'), 3))
    assert_equal(3, Reline::Unicode.em_forward_word('abcfoo', 3))
    assert_equal(3, Reline::Unicode.em_forward_word('abc---', 3))
    assert_equal(0, Reline::Unicode.em_forward_word('abc', 3))
@ -154,6 +155,7 @@ class Reline::Unicode::Test < Reline::TestCase
  def test_em_forward_word_with_capitalization
    assert_equal([12, '---Fooあbar'], Reline::Unicode.em_forward_word_with_capitalization('abc---foOあBar-baz', 3))
    assert_equal([11, '---Fooあbar'.encode('sjis')], Reline::Unicode.em_forward_word_with_capitalization('abc---foOあBar-baz'.encode('sjis'), 3))
    assert_equal([3, 'Foo'], Reline::Unicode.em_forward_word_with_capitalization('abcfOo', 3))
    assert_equal([3, '---'], Reline::Unicode.em_forward_word_with_capitalization('abc---', 3))
    assert_equal([0, ''], Reline::Unicode.em_forward_word_with_capitalization('abc', 3))
@ -162,6 +164,7 @@ class Reline::Unicode::Test < Reline::TestCase
  def test_em_backward_word
    assert_equal(12, Reline::Unicode.em_backward_word('abc foo-barあbaz--- xyz', 20))
    assert_equal(11, Reline::Unicode.em_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 19))
    assert_equal(2, Reline::Unicode.em_backward_word('  ', 2))
    assert_equal(2, Reline::Unicode.em_backward_word('ab', 2))
    assert_equal(0, Reline::Unicode.em_backward_word('ab', 0))
@ -169,6 +172,7 @@ class Reline::Unicode::Test < Reline::TestCase
  def test_em_big_backward_word
    assert_equal(16, Reline::Unicode.em_big_backward_word('abc foo-barあbaz--- xyz', 20))
    assert_equal(15, Reline::Unicode.em_big_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 19))
    assert_equal(2, Reline::Unicode.em_big_backward_word('  ', 2))
    assert_equal(2, Reline::Unicode.em_big_backward_word('ab', 2))
    assert_equal(0, Reline::Unicode.em_big_backward_word('ab', 0))
@ -184,22 +188,22 @@ class Reline::Unicode::Test < Reline::TestCase
    assert_equal([3, 5, 6, 8], Reline::Unicode.ed_transpose_words('aa bb cc  ', 7))
    assert_equal([3, 5, 6, 10], Reline::Unicode.ed_transpose_words('aa bb cc  ', 8))
    assert_equal([3, 5, 6, 10], Reline::Unicode.ed_transpose_words('aa bb cc  ', 9))
-    word1 = 'fooあ'
+    ['sjis', 'utf-8'].each do |encoding|
-    word2 = 'barあbaz'
+      texts = ['fooあ', 'barあbaz', 'aaa  -', '- -', '-  bbb']
-    left = 'aaa  -'
+      word1, word2, left, middle, right = texts.map { |text| text.encode(encoding) }
-    middle = '- -'
+      expected = [left.bytesize, (left + word1).bytesize, (left + word1 + middle).bytesize, (left + word1 + middle + word2).bytesize]
-    right = '-  bbb'
+      assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize))
-    expected = [left.bytesize, (left + word1).bytesize, (left + word1 + middle).bytesize, (left + word1 + middle + word2).bytesize]
+      assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize))
-    assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize))
+      assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize + word2.bytesize - 1))
-    assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize))
+    end
    assert_equal(expected, Reline::Unicode.ed_transpose_words(left + word1 + middle + word2 + right, left.bytesize + word1.bytesize + middle.bytesize + word2.bytesize - 1))
  end
  def test_vi_big_forward_word
    assert_equal(18, Reline::Unicode.vi_big_forward_word('abc---fooあbar-baz  xyz', 3))
    assert_equal(8, Reline::Unicode.vi_big_forward_word('abcfooあ  --', 3))
    assert_equal(7, Reline::Unicode.vi_big_forward_word('abcfooあ  --'.encode('sjis'), 3))
    assert_equal(6, Reline::Unicode.vi_big_forward_word('abcfooあ', 3))
-    assert_equal(2, Reline::Unicode.vi_big_forward_word('abc-  ', 3)) # maybe inconsistent
+    assert_equal(3, Reline::Unicode.vi_big_forward_word('abc-  ', 3))
    assert_equal(0, Reline::Unicode.vi_big_forward_word('abc', 3))
  end
@ -211,6 +215,7 @@ class Reline::Unicode::Test < Reline::TestCase
    assert_equal(1, Reline::Unicode.vi_big_forward_end_word('aa b', 0))
    assert_equal(3, Reline::Unicode.vi_big_forward_end_word('  aa b', 0))
    assert_equal(15, Reline::Unicode.vi_big_forward_end_word('abc---fooあbar-baz  xyz', 3))
    assert_equal(14, Reline::Unicode.vi_big_forward_end_word('abc---fooあbar-baz  xyz'.encode('sjis'), 3))
    assert_equal(3, Reline::Unicode.vi_big_forward_end_word('abcfooあ  --', 3))
    assert_equal(3, Reline::Unicode.vi_big_forward_end_word('abcfooあ', 3))
    assert_equal(2, Reline::Unicode.vi_big_forward_end_word('abc-  ', 3))
@ -219,6 +224,7 @@ class Reline::Unicode::Test < Reline::TestCase
  def test_vi_big_backward_word
    assert_equal(16, Reline::Unicode.vi_big_backward_word('abc foo-barあbaz--- xyz', 20))
    assert_equal(15, Reline::Unicode.vi_big_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 19))
    assert_equal(2, Reline::Unicode.vi_big_backward_word('  ', 2))
    assert_equal(2, Reline::Unicode.vi_big_backward_word('ab', 2))
    assert_equal(0, Reline::Unicode.vi_big_backward_word('ab', 0))
@ -226,23 +232,28 @@ class Reline::Unicode::Test < Reline::TestCase
  def test_vi_forward_word
    assert_equal(3, Reline::Unicode.vi_forward_word('abc---fooあbar-baz', 3))
-    assert_equal(3, Reline::Unicode.vi_forward_word('abc---fooあbar-baz', 6)) # maybe bug
+    assert_equal(9, Reline::Unicode.vi_forward_word('abc---fooあbar-baz', 6))
-    assert_equal(3, Reline::Unicode.vi_forward_word('abcfooあ', 3)) # maybe bug
+    assert_equal(8, Reline::Unicode.vi_forward_word('abc---fooあbar-baz'.encode('sjis'), 6))
    assert_equal(6, Reline::Unicode.vi_forward_word('abcfooあ', 3))
    assert_equal(3, Reline::Unicode.vi_forward_word('abc---', 3))
    assert_equal(0, Reline::Unicode.vi_forward_word('abc', 3))
    assert_equal(2, Reline::Unicode.vi_forward_word('abc   def', 1, true))
    assert_equal(5, Reline::Unicode.vi_forward_word('abc   def', 1, false))
  end
  def test_vi_forward_end_word
    assert_equal(2, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz', 3))
-    assert_equal(2, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz', 6)) # maybe bug
+    assert_equal(8, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz', 6))
-    assert_equal(2, Reline::Unicode.vi_forward_end_word('abcfooあ', 3)) # maybe bug
+    assert_equal(7, Reline::Unicode.vi_forward_end_word('abc---fooあbar-baz'.encode('sjis'), 6))
    assert_equal(3, Reline::Unicode.vi_forward_end_word('abcfooあ', 3))
    assert_equal(2, Reline::Unicode.vi_forward_end_word('abc---', 3))
    assert_equal(0, Reline::Unicode.vi_forward_end_word('abc', 3))
  end
  def test_vi_backward_word
    assert_equal(3, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz', 20))
-    assert_equal(3, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz', 17)) # maybe bug
+    assert_equal(9, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz', 17))
    assert_equal(8, Reline::Unicode.vi_backward_word('abc foo-barあbaz--- xyz'.encode('sjis'), 16))
    assert_equal(2, Reline::Unicode.vi_backward_word('  ', 2))
    assert_equal(2, Reline::Unicode.vi_backward_word('ab', 2))
    assert_equal(0, Reline::Unicode.vi_backward_word('ab', 0))
@ -250,8 +261,26 @@ class Reline::Unicode::Test < Reline::TestCase
  def test_vi_first_print
    assert_equal(3, Reline::Unicode.vi_first_print('   abcdefg'))
-    assert_equal(2, Reline::Unicode.vi_first_print('   ')) # maybe inconsistent
+    assert_equal(3, Reline::Unicode.vi_first_print('   '))
    assert_equal(0, Reline::Unicode.vi_first_print('abc'))
    assert_equal(0, Reline::Unicode.vi_first_print('あ'))
    assert_equal(0, Reline::Unicode.vi_first_print('あ'.encode('sjis')))
    assert_equal(0, Reline::Unicode.vi_first_print(''))
  end
  def test_character_type
    assert(Reline::Unicode.word_character?('a'))
    assert(Reline::Unicode.word_character?('あ'))
    assert(Reline::Unicode.word_character?('あ'.encode('sjis')))
    refute(Reline::Unicode.word_character?(33345.chr('sjis')))
    refute(Reline::Unicode.word_character?('-'))
    refute(Reline::Unicode.word_character?(nil))
    assert(Reline::Unicode.space_character?(' '))
    refute(Reline::Unicode.space_character?('あ'))
    refute(Reline::Unicode.space_character?('あ'.encode('sjis')))
    refute(Reline::Unicode.space_character?(33345.chr('sjis')))
    refute(Reline::Unicode.space_character?('-'))
    refute(Reline::Unicode.space_character?(nil))
  end
 end