Import CSV 3.1.2 (#2547)

Merged-By: kou <kou@clear-code.com>
2019-10-12 14:03:21 +09:00 · 2019-10-12 14:03:21 +09:00 · 92df7d98b6 · 2019-10-12 14:03:45 +09:00
commit 92df7d98b6
parent d6e68bb263
14 changed files with 582 additions and 447 deletions
--- a/2
+++ b/2
@ -343,7 +343,7 @@ CGI::
 CSV::
-  * Upgrade to 3.0.9.
+  * Upgrade to 3.1.2.
    See https://github.com/ruby/csv/blob/master/NEWS.md.
 Date::
--- a/lib/csv.rb
+++ b/lib/csv.rb
--- a/lib/csv/fields_converter.rb
+++ b/lib/csv/fields_converter.rb
@ -1,8 +1,14 @@
 # frozen_string_literal: true
 class CSV
  # Note: Don't use this class directly. This is an internal class.
  class FieldsConverter
    include Enumerable
    #
    # A CSV::FieldsConverter is a data structure for storing the
    # fields converter properties to be passed as a parameter
    # when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
    #
    def initialize(options={})
      @converters = []
--- a/lib/csv/parser.rb
+++ b/lib/csv/parser.rb
@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
 using CSV::MatchP if CSV.const_defined?(:MatchP)
 class CSV
  # Note: Don't use this class directly. This is an internal class.
  class Parser
    #
    # A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
    # or String object being read from or written to. Your data is never transcoded
    # (unless you ask Ruby to transcode it for you) and will literally be parsed in
    # the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
    # Encoding of your data. This is accomplished by transcoding the parser itself
    # into your Encoding.
    #
    # Raised when encoding is invalid.
    class InvalidEncoding < StandardError
    end
    #
    # CSV::Scanner receives a CSV output, scans it and return the content.
    # It also controls the life cycle of the object with its methods +keep_start+,
    # +keep_end+, +keep_back+, +keep_drop+.
    #
    # Uses StringScanner (the official strscan gem). Strscan provides lexical
    # scanning operations on a String. We inherit its object and take advantage
    # on the methods. For more information, please visit:
    # https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
    #
    class Scanner < StringScanner
      alias_method :scan_all, :scan
@ -38,7 +59,7 @@ class CSV
      def keep_end
        start = @keeps.pop
-        string[start, pos - start]
+        string.byteslice(start, pos - start)
      end
      def keep_back
@ -50,6 +71,18 @@ class CSV
      end
    end
    #
    # CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
    # It also controls the life cycle of the object with its methods +keep_start+,
    # +keep_end+, +keep_back+, +keep_drop+.
    #
    # CSV::InputsScanner.scan() tries to match with pattern at the current position.
    # If there's a match, the scanner advances the “scan pointer” and returns the matched string.
    # Otherwise, the scanner returns nil.
    #
    # CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
    # If there is no more data (eos? = true), it returns "".
    #
    class InputsScanner
      def initialize(inputs, encoding, chunk_size: 8192)
        @inputs = inputs.dup
@ -137,7 +170,7 @@ class CSV
      def keep_end
        start, buffer = @keeps.pop
-        keep = @scanner.string[start, @scanner.pos - start]
+        keep = @scanner.string.byteslice(start, @scanner.pos - start)
        if buffer
          buffer << keep
          keep = buffer
@ -192,7 +225,7 @@ class CSV
        input = @inputs.first
        case input
        when StringIO
-          string = input.string
+          string = input.read
          raise InvalidEncoding unless string.valid_encoding?
          @scanner = StringScanner.new(string)
          @inputs.shift
@ -319,6 +352,7 @@ class CSV
    end
    private
    # A set of tasks to prepare the file in order to parse it
    def prepare
      prepare_variable
      prepare_quote_character
@ -447,7 +481,13 @@ class CSV
    end
    def prepare_separators
-      @column_separator = @options[:column_separator].to_s.encode(@encoding)
+      column_separator = @options[:column_separator]
      @column_separator = column_separator.to_s.encode(@encoding)
      if @column_separator.size < 1
        message = ":col_sep must be 1 or more characters: "
        message += column_separator.inspect
        raise ArgumentError, message
      end
      @row_separator =
        resolve_row_separator(@options[:row_separator]).encode(@encoding)
@ -534,7 +574,9 @@ class CSV
        cr = "\r".encode(@encoding)
        lf = "\n".encode(@encoding)
        if @input.is_a?(StringIO)
-          separator = detect_row_separator(@input.string, cr, lf)
+          pos = @input.pos
          separator = detect_row_separator(@input.read, cr, lf)
          @input.seek(pos)
        elsif @input.respond_to?(:gets)
          if @input.is_a?(File)
            chunk_size = 32 * 1024
@ -651,7 +693,9 @@ class CSV
      return false if @quote_character.nil?
      if @input.is_a?(StringIO)
-        sample = @input.string
+        pos = @input.pos
        sample = @input.read
        @input.seek(pos)
      else
        return false if @samples.empty?
        sample = @samples.first
@ -684,7 +728,7 @@ class CSV
          UnoptimizedStringIO.new(sample)
        end
        if @input.is_a?(StringIO)
-          inputs << UnoptimizedStringIO.new(@input.string)
+          inputs << UnoptimizedStringIO.new(@input.read)
        else
          inputs << @input
        end
@ -697,7 +741,7 @@ class CSV
      def build_scanner
        string = nil
        if @samples.empty? and @input.is_a?(StringIO)
-          string = @input.string
+          string = @input.read
        elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
          string = @samples[0]
        end
--- a/lib/csv/row.rb
+++ b/lib/csv/row.rb
@ -4,7 +4,7 @@ require "forwardable"
 class CSV
  #
-  # A CSV::Row is part Array and part Hash.  It retains an order for the fields
+  # A CSV::Row is part Array and part Hash. It retains an order for the fields
  # and allows duplicates just as an Array would, but also allows you to access
  # fields by name just as you could if they were in a Hash.
  #
@ -13,13 +13,13 @@ class CSV
  #
  class Row
    #
-    # Construct a new CSV::Row from +headers+ and +fields+, which are expected
+    # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
-    # to be Arrays.  If one Array is shorter than the other, it will be padded
+    # to be Arrays. If one Array is shorter than the other, it will be padded
    # with +nil+ objects.
    #
    # The optional +header_row+ parameter can be set to +true+ to indicate, via
    # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
-    # row.  Otherwise, the row is assumes to be a field row.
+    # row. Otherwise, the row assumes to be a field row.
    #
    # A CSV::Row object supports the following Array methods through delegation:
    #
@ -74,11 +74,11 @@ class CSV
    #   field( header, offset )
    #   field( index )
    #
-    # This method will return the field value by +header+ or +index+.  If a field
+    # This method will return the field value by +header+ or +index+. If a field
    # is not found, +nil+ is returned.
    #
    # When provided, +offset+ ensures that a header match occurs on or later
-    # than the +offset+ index.  You can use this to find duplicate headers,
+    # than the +offset+ index. You can use this to find duplicate headers,
    # without resorting to hard-coding exact indices.
    #
    def field(header_or_index, minimum_index = 0)
@ -142,7 +142,7 @@ class CSV
    # assigns the +value+.
    #
    # Assigning past the end of the row with an index will set all pairs between
-    # to <tt>[nil, nil]</tt>.  Assigning to an unused header appends the new
+    # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
    # pair.
    #
    def []=(*args)
@ -172,8 +172,8 @@ class CSV
    #   <<( header_and_field_hash )
    #
    # If a two-element Array is provided, it is assumed to be a header and field
-    # and the pair is appended.  A Hash works the same way with the key being
+    # and the pair is appended. A Hash works the same way with the key being
-    # the header and the value being the field.  Anything else is assumed to be
+    # the header and the value being the field. Anything else is assumed to be
    # a lone field which is appended with a +nil+ header.
    #
    # This method returns the row for chaining.
@ -191,7 +191,7 @@ class CSV
    end
    #
-    # A shortcut for appending multiple fields.  Equivalent to:
+    # A shortcut for appending multiple fields. Equivalent to:
    #
    #   args.each { |arg| csv_row << arg }
    #
@ -209,8 +209,8 @@ class CSV
    #   delete( header, offset )
    #   delete( index )
    #
-    # Used to remove a pair from the row by +header+ or +index+.  The pair is
+    # Removes a pair from the row by +header+ or +index+. The pair is
-    # located as described in CSV::Row.field().  The deleted pair is returned,
+    # located as described in CSV::Row.field(). The deleted pair is returned,
    # or +nil+ if a pair could not be found.
    #
    def delete(header_or_index, minimum_index = 0)
@ -325,7 +325,7 @@ class CSV
    end
    #
-    # Collapses the row into a simple Hash.  Be warned that this discards field
+    # Collapses the row into a simple Hash. Be warned that this discards field
    # order and clobbers duplicate fields.
    #
    def to_h
@ -340,7 +340,7 @@ class CSV
    alias_method :to_ary, :to_a
    #
-    # Returns the row as a CSV String.  Headers are not used.  Equivalent to:
+    # Returns the row as a CSV String. Headers are not used. Equivalent to:
    #
    #   csv_row.fields.to_csv( options )
    #
@ -367,7 +367,9 @@ class CSV
      end
    end
    #
    # A summary of fields, by header, in an ASCII compatible String.
    #
    def inspect
      str = ["#<", self.class.to_s]
      each do |header, field|
--- a/lib/csv/table.rb
+++ b/lib/csv/table.rb
@ -5,7 +5,7 @@ require "forwardable"
 class CSV
  #
  # A CSV::Table is a two-dimensional data structure for representing CSV
-  # documents.  Tables allow you to work with the data by row or column,
+  # documents. Tables allow you to work with the data by row or column,
  # manipulate the data, and even convert the results back to CSV, if needed.
  #
  # All tables returned by CSV will be constructed from this class, if header
@ -13,8 +13,8 @@ class CSV
  #
  class Table
    #
-    # Construct a new CSV::Table from +array_of_rows+, which are expected
+    # Constructs a new CSV::Table from +array_of_rows+, which are expected
-    # to be CSV::Row objects.  All rows are assumed to have the same headers.
+    # to be CSV::Row objects. All rows are assumed to have the same headers.
    #
    # The optional +headers+ parameter can be set to Array of headers.
    # If headers aren't set, headers are fetched from CSV::Row objects.
@ -55,11 +55,11 @@ class CSV
    def_delegators :@table, :empty?, :length, :size
    #
-    # Returns a duplicate table object, in column mode.  This is handy for
+    # Returns a duplicate table object, in column mode. This is handy for
    # chaining in a single call without changing the table mode, but be aware
    # that this method can consume a fair amount of memory for bigger data sets.
    #
-    # This method returns the duplicate table for chaining.  Don't chain
+    # This method returns the duplicate table for chaining. Don't chain
    # destructive methods (like []=()) this way though, since you are working
    # with a duplicate.
    #
@ -68,7 +68,7 @@ class CSV
    end
    #
-    # Switches the mode of this table to column mode.  All calls to indexing and
+    # Switches the mode of this table to column mode. All calls to indexing and
    # iteration methods will work with columns until the mode is changed again.
    #
    # This method returns the table and is safe to chain.
@ -80,7 +80,7 @@ class CSV
    end
    #
-    # Returns a duplicate table object, in mixed mode.  This is handy for
+    # Returns a duplicate table object, in mixed mode. This is handy for
    # chaining in a single call without changing the table mode, but be aware
    # that this method can consume a fair amount of memory for bigger data sets.
    #
@ -93,9 +93,9 @@ class CSV
    end
    #
-    # Switches the mode of this table to mixed mode.  All calls to indexing and
+    # Switches the mode of this table to mixed mode. All calls to indexing and
    # iteration methods will use the default intelligent indexing system until
-    # the mode is changed again.  In mixed mode an index is assumed to be a row
+    # the mode is changed again. In mixed mode an index is assumed to be a row
    # reference while anything else is assumed to be column access by headers.
    #
    # This method returns the table and is safe to chain.
@ -120,7 +120,7 @@ class CSV
    end
    #
-    # Switches the mode of this table to row mode.  All calls to indexing and
+    # Switches the mode of this table to row mode. All calls to indexing and
    # iteration methods will work with rows until the mode is changed again.
    #
    # This method returns the table and is safe to chain.
@ -146,7 +146,7 @@ class CSV
    #
    # In the default mixed mode, this method returns rows for index access and
-    # columns for header access.  You can force the index association by first
+    # columns for header access. You can force the index association by first
    # calling by_col!() or by_row!().
    #
    # Columns are returned as an Array of values.  Altering that Array has no
@ -163,18 +163,18 @@ class CSV
    #
    # In the default mixed mode, this method assigns rows for index access and
-    # columns for header access.  You can force the index association by first
+    # columns for header access. You can force the index association by first
    # calling by_col!() or by_row!().
    #
    # Rows may be set to an Array of values (which will inherit the table's
    # headers()) or a CSV::Row.
    #
    # Columns may be set to a single value, which is copied to each row of the
-    # column, or an Array of values.  Arrays of values are assigned to rows top
+    # column, or an Array of values. Arrays of values are assigned to rows top
-    # to bottom in row major order.  Excess values are ignored and if the Array
+    # to bottom in row major order. Excess values are ignored and if the Array
    # does not have a value for each row the extra rows will receive a +nil+.
    #
-    # Assigning to an existing column or row clobbers the data.  Assigning to
+    # Assigning to an existing column or row clobbers the data. Assigning to
    # new columns creates them at the right end of the table.
    #
    def []=(index_or_header, value)
@ -212,9 +212,9 @@ class CSV
    #
    # The mixed mode default is to treat a list of indices as row access,
-    # returning the rows indicated.  Anything else is considered columnar
+    # returning the rows indicated. Anything else is considered columnar
-    # access.  For columnar access, the return set has an Array for each row
+    # access. For columnar access, the return set has an Array for each row
-    # with the values indicated by the headers in each Array.  You can force
+    # with the values indicated by the headers in each Array. You can force
    # column or row mode using by_col!() or by_row!().
    #
    # You cannot mix column and row access.
@ -234,7 +234,7 @@ class CSV
    end
    #
-    # Adds a new row to the bottom end of this table.  You can provide an Array,
+    # Adds a new row to the bottom end of this table. You can provide an Array,
    # which will be converted to a CSV::Row (inheriting the table's headers()),
    # or a CSV::Row.
    #
@ -251,7 +251,7 @@ class CSV
    end
    #
-    # A shortcut for appending multiple rows.  Equivalent to:
+    # A shortcut for appending multiple rows. Equivalent to:
    #
    #   rows.each { |row| self << row }
    #
@ -264,9 +264,9 @@ class CSV
    end
    #
-    # Removes and returns the indicated columns or rows.  In the default mixed
+    # Removes and returns the indicated columns or rows. In the default mixed
    # mode indices refer to rows and everything else is assumed to be a column
-    # headers.  Use by_col!() or by_row!() to force the lookup.
+    # headers. Use by_col!() or by_row!() to force the lookup.
    #
    def delete(*indexes_or_headers)
      if indexes_or_headers.empty?
@ -293,9 +293,9 @@ class CSV
    end
    #
-    # Removes any column or row for which the block returns +true+.  In the
+    # Removes any column or row for which the block returns +true+. In the
    # default mixed mode or row mode, iteration is the standard row major
-    # walking of rows.  In column mode, iteration will +yield+ two element
+    # walking of rows. In column mode, iteration will +yield+ two element
    # tuples containing the column name and an Array of values for that column.
    #
    # This method returns the table for chaining.
@ -321,7 +321,7 @@ class CSV
    #
    # In the default mixed mode or row mode, iteration is the standard row major
-    # walking of rows.  In column mode, iteration will +yield+ two element
+    # walking of rows. In column mode, iteration will +yield+ two element
    # tuples containing the column name and an Array of values for that column.
    #
    # This method returns the table for chaining.
@ -347,7 +347,7 @@ class CSV
    end
    #
-    # Returns the table as an Array of Arrays.  Headers will be the first row,
+    # Returns the table as an Array of Arrays. Headers will be the first row,
    # then all of the field rows will follow.
    #
    def to_a
@ -360,7 +360,7 @@ class CSV
    end
    #
-    # Returns the table as a complete CSV String.  Headers will be listed first,
+    # Returns the table as a complete CSV String. Headers will be listed first,
    # then all of the field rows.
    #
    # This method assumes you want the Table.headers(), unless you explicitly
--- a/lib/csv/version.rb
+++ b/lib/csv/version.rb
@ -2,5 +2,5 @@
 class CSV
  # The version of the installed library.
-  VERSION = "3.1.1"
+  VERSION = "3.1.2"
 end
--- a/lib/csv/writer.rb
+++ b/lib/csv/writer.rb
@ -6,7 +6,12 @@ require_relative "row"
 using CSV::MatchP if CSV.const_defined?(:MatchP)
 class CSV
  # Note: Don't use this class directly. This is an internal class.
  class Writer
    #
    # A CSV::Writer receives an output, prepares the header, format and output.
    # It allows us to write new rows in the object and rewind it.
    #
    attr_reader :lineno
    attr_reader :headers
@ -22,6 +27,9 @@ class CSV
      @fields_converter = @options[:fields_converter]
    end
    #
    # Adds a new row
    #
    def <<(row)
      case row
      when Row
@ -47,6 +55,9 @@ class CSV
      self
    end
    #
    # Winds back to the beginning
    #
    def rewind
      @lineno = 0
      @headers = nil if @options[:headers].nil?
--- a/test/csv/parse/test_general.rb
+++ b/test/csv/parse/test_general.rb
@ -233,11 +233,21 @@ line,5,jkl
    assert_equal([["a"]], CSV.parse("a\r\n"))
  end
  def test_seeked_string_io
    input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n")
    input_with_bom.read(3)
    assert_equal([
                   ["あ", "い", "う"],
                   ["a", "b", "c"],
                 ],
                 CSV.new(input_with_bom).each.to_a)
  end
  private
-  def assert_parse_errors_out(*args, **options)
+  def assert_parse_errors_out(data, **options)
    assert_raise(CSV::MalformedCSVError) do
      Timeout.timeout(0.2) do
-        CSV.parse(*args, **options)
+        CSV.parse(data, **options)
        fail("Parse didn't error out")
      end
    end
--- a/test/csv/parse/test_header.rb
+++ b/test/csv/parse/test_header.rb
@ -312,12 +312,12 @@ A
  end
  def test_parse_empty
-    assert_equal(CSV::Table.new([], **{}),
+    assert_equal(CSV::Table.new([]),
                 CSV.parse("", headers: true))
  end
  def test_parse_empty_line
-    assert_equal(CSV::Table.new([], **{}),
+    assert_equal(CSV::Table.new([]),
                 CSV.parse("\n", headers: true))
  end
--- a/test/csv/parse/test_rewind.rb
+++ b/test/csv/parse/test_rewind.rb
@ -6,7 +6,7 @@ require_relative "../helper"
 class TestCSVParseRewind < Test::Unit::TestCase
  extend DifferentOFS
-  def parse(data, options={})
+  def parse(data, **options)
    csv = CSV.new(data, **options)
    records = csv.to_a
    csv.rewind
--- a/test/csv/test_encodings.rb
+++ b/test/csv/test_encodings.rb
@ -268,11 +268,11 @@ class TestCSVEncodings < Test::Unit::TestCase
  private
-  def assert_parses(fields, encoding, options = { })
+  def assert_parses(fields, encoding, **options)
    encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
    orig_fields = fields
    fields = encode_ary(fields, encoding)
-    data = ary_to_data(fields, options)
+    data = ary_to_data(fields, **options)
    parsed = CSV.parse(data, **options)
    assert_equal(fields, parsed)
    parsed.flatten.each_with_index do |field, i|
@ -285,7 +285,9 @@ class TestCSVEncodings < Test::Unit::TestCase
      end
    end
    begin
-      CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", **options) do |csv|
+      CSV.open(@temp_csv_path,
               "rb:#{encoding}:#{__ENCODING__}",
               **options) do |csv|
        csv.each_with_index do |row, i|
          assert_equal(orig_fields[i], row)
        end
@ -315,7 +317,7 @@ class TestCSVEncodings < Test::Unit::TestCase
    ary.map { |row| row.map { |field| field.encode(encoding) } }
  end
-  def ary_to_data(ary, options = { })
+  def ary_to_data(ary, **options)
    encoding   = ary.flatten.first.encoding
    quote_char = (options[:quote_char] || '"').encode(encoding)
    col_sep    = (options[:col_sep]    || ",").encode(encoding)
@ -327,9 +329,9 @@ class TestCSVEncodings < Test::Unit::TestCase
    }.join('').encode(encoding)
  end
-  def encode_for_tests(data, options = { })
+  def encode_for_tests(data, **options)
-    yield ary_to_data(encode_ary(data, "UTF-8"),    options)
+    yield ary_to_data(encode_ary(data, "UTF-8"),    **options)
-    yield ary_to_data(encode_ary(data, "UTF-16BE"), options)
+    yield ary_to_data(encode_ary(data, "UTF-16BE"), **options)
  end
  def each_encoding
--- a/test/csv/test_features.rb
+++ b/test/csv/test_features.rb
@ -52,6 +52,20 @@ line,4,jkl
    assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";"))
  end
  def test_col_sep_nil
    assert_raise_with_message(ArgumentError,
                              ":col_sep must be 1 or more characters: nil") do
      CSV.parse(@sample_data, col_sep: nil)
    end
  end
  def test_col_sep_empty
    assert_raise_with_message(ArgumentError,
                              ":col_sep must be 1 or more characters: \"\"") do
      CSV.parse(@sample_data, col_sep: "")
    end
  end
  def test_row_sep
    error = assert_raise(CSV::MalformedCSVError) do
      CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
@ -110,10 +124,10 @@ line,4,jkl
  def test_line
    lines = [
-      %Q(abc,def\n),
+      %Q(\u{3000}abc,def\n),
-      %Q(abc,"d\nef"\n),
+      %Q(\u{3000}abc,"d\nef"\n),
-      %Q(abc,"d\r\nef"\n),
+      %Q(\u{3000}abc,"d\r\nef"\n),
-      %Q(abc,"d\ref")
+      %Q(\u{3000}abc,"d\ref")
    ]
    csv = CSV.new(lines.join(''))
    lines.each do |line|
--- a/test/csv/write/test_general.rb
+++ b/test/csv/write/test_general.rb
@ -205,6 +205,32 @@ module TestCSVWriteGeneral
    assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
                 generate_line(row))
  end
  def test_encoding_with_default_internal
    with_default_internal(Encoding::UTF_8) do
      row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")}
      assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
                   generate_line(row, encoding: Encoding::EUC_JP))
    end
  end
  def test_with_default_internal
    with_default_internal(Encoding::UTF_8) do
      row = ["あ", "い", "う"].collect {|field| field.encode("EUC-JP")}
      assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
                   generate_line(row))
    end
  end
  def with_default_internal(encoding)
    original = Encoding.default_internal
    begin
      Encoding.default_internal = encoding
      yield
    ensure
      Encoding.default_internal = original
    end
  end
 end
 class TestCSVWriteGeneralGenerateLine < Test::Unit::TestCase