Import CSV 3.1.2 (#2547)

This commit is contained in:
Sutou Kouhei 2019-10-12 14:03:21 +09:00 committed by GitHub
parent d6e68bb263
commit 92df7d98b6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
Notes: git 2019-10-12 14:03:45 +09:00
Merged-By: kou <kou@clear-code.com>
14 changed files with 582 additions and 447 deletions

2
NEWS
View File

@ -343,7 +343,7 @@ CGI::
CSV:: CSV::
* Upgrade to 3.0.9. * Upgrade to 3.1.2.
See https://github.com/ruby/csv/blob/master/NEWS.md. See https://github.com/ruby/csv/blob/master/NEWS.md.
Date:: Date::

File diff suppressed because it is too large Load Diff

View File

@ -1,8 +1,14 @@
# frozen_string_literal: true # frozen_string_literal: true
class CSV class CSV
# Note: Don't use this class directly. This is an internal class.
class FieldsConverter class FieldsConverter
include Enumerable include Enumerable
#
# A CSV::FieldsConverter is a data structure for storing the
# fields converter properties to be passed as a parameter
# when parsing a new file (e.g. CSV::Parser.new(@io, parser_options))
#
def initialize(options={}) def initialize(options={})
@converters = [] @converters = []

View File

@ -11,10 +11,31 @@ using CSV::DeleteSuffix if CSV.const_defined?(:DeleteSuffix)
using CSV::MatchP if CSV.const_defined?(:MatchP) using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV class CSV
# Note: Don't use this class directly. This is an internal class.
class Parser class Parser
#
# A CSV::Parser is m17n aware. The parser works in the Encoding of the IO
# or String object being read from or written to. Your data is never transcoded
# (unless you ask Ruby to transcode it for you) and will literally be parsed in
# the Encoding it is in. Thus CSV will return Arrays or Rows of Strings in the
# Encoding of your data. This is accomplished by transcoding the parser itself
# into your Encoding.
#
# Raised when encoding is invalid.
class InvalidEncoding < StandardError class InvalidEncoding < StandardError
end end
#
# CSV::Scanner receives a CSV output, scans it and return the content.
# It also controls the life cycle of the object with its methods +keep_start+,
# +keep_end+, +keep_back+, +keep_drop+.
#
# Uses StringScanner (the official strscan gem). Strscan provides lexical
# scanning operations on a String. We inherit its object and take advantage
# on the methods. For more information, please visit:
# https://ruby-doc.org/stdlib-2.6.1/libdoc/strscan/rdoc/StringScanner.html
#
class Scanner < StringScanner class Scanner < StringScanner
alias_method :scan_all, :scan alias_method :scan_all, :scan
@ -38,7 +59,7 @@ class CSV
def keep_end def keep_end
start = @keeps.pop start = @keeps.pop
string[start, pos - start] string.byteslice(start, pos - start)
end end
def keep_back def keep_back
@ -50,6 +71,18 @@ class CSV
end end
end end
#
# CSV::InputsScanner receives IO inputs, encoding and the chunk_size.
# It also controls the life cycle of the object with its methods +keep_start+,
# +keep_end+, +keep_back+, +keep_drop+.
#
# CSV::InputsScanner.scan() tries to match with pattern at the current position.
# If there's a match, the scanner advances the “scan pointer” and returns the matched string.
# Otherwise, the scanner returns nil.
#
# CSV::InputsScanner.rest() returns the “rest” of the string (i.e. everything after the scan pointer).
# If there is no more data (eos? = true), it returns "".
#
class InputsScanner class InputsScanner
def initialize(inputs, encoding, chunk_size: 8192) def initialize(inputs, encoding, chunk_size: 8192)
@inputs = inputs.dup @inputs = inputs.dup
@ -137,7 +170,7 @@ class CSV
def keep_end def keep_end
start, buffer = @keeps.pop start, buffer = @keeps.pop
keep = @scanner.string[start, @scanner.pos - start] keep = @scanner.string.byteslice(start, @scanner.pos - start)
if buffer if buffer
buffer << keep buffer << keep
keep = buffer keep = buffer
@ -192,7 +225,7 @@ class CSV
input = @inputs.first input = @inputs.first
case input case input
when StringIO when StringIO
string = input.string string = input.read
raise InvalidEncoding unless string.valid_encoding? raise InvalidEncoding unless string.valid_encoding?
@scanner = StringScanner.new(string) @scanner = StringScanner.new(string)
@inputs.shift @inputs.shift
@ -319,6 +352,7 @@ class CSV
end end
private private
# A set of tasks to prepare the file in order to parse it
def prepare def prepare
prepare_variable prepare_variable
prepare_quote_character prepare_quote_character
@ -447,7 +481,13 @@ class CSV
end end
def prepare_separators def prepare_separators
@column_separator = @options[:column_separator].to_s.encode(@encoding) column_separator = @options[:column_separator]
@column_separator = column_separator.to_s.encode(@encoding)
if @column_separator.size < 1
message = ":col_sep must be 1 or more characters: "
message += column_separator.inspect
raise ArgumentError, message
end
@row_separator = @row_separator =
resolve_row_separator(@options[:row_separator]).encode(@encoding) resolve_row_separator(@options[:row_separator]).encode(@encoding)
@ -534,7 +574,9 @@ class CSV
cr = "\r".encode(@encoding) cr = "\r".encode(@encoding)
lf = "\n".encode(@encoding) lf = "\n".encode(@encoding)
if @input.is_a?(StringIO) if @input.is_a?(StringIO)
separator = detect_row_separator(@input.string, cr, lf) pos = @input.pos
separator = detect_row_separator(@input.read, cr, lf)
@input.seek(pos)
elsif @input.respond_to?(:gets) elsif @input.respond_to?(:gets)
if @input.is_a?(File) if @input.is_a?(File)
chunk_size = 32 * 1024 chunk_size = 32 * 1024
@ -651,7 +693,9 @@ class CSV
return false if @quote_character.nil? return false if @quote_character.nil?
if @input.is_a?(StringIO) if @input.is_a?(StringIO)
sample = @input.string pos = @input.pos
sample = @input.read
@input.seek(pos)
else else
return false if @samples.empty? return false if @samples.empty?
sample = @samples.first sample = @samples.first
@ -684,7 +728,7 @@ class CSV
UnoptimizedStringIO.new(sample) UnoptimizedStringIO.new(sample)
end end
if @input.is_a?(StringIO) if @input.is_a?(StringIO)
inputs << UnoptimizedStringIO.new(@input.string) inputs << UnoptimizedStringIO.new(@input.read)
else else
inputs << @input inputs << @input
end end
@ -697,7 +741,7 @@ class CSV
def build_scanner def build_scanner
string = nil string = nil
if @samples.empty? and @input.is_a?(StringIO) if @samples.empty? and @input.is_a?(StringIO)
string = @input.string string = @input.read
elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof? elsif @samples.size == 1 and @input.respond_to?(:eof?) and @input.eof?
string = @samples[0] string = @samples[0]
end end

View File

@ -4,7 +4,7 @@ require "forwardable"
class CSV class CSV
# #
# A CSV::Row is part Array and part Hash. It retains an order for the fields # A CSV::Row is part Array and part Hash. It retains an order for the fields
# and allows duplicates just as an Array would, but also allows you to access # and allows duplicates just as an Array would, but also allows you to access
# fields by name just as you could if they were in a Hash. # fields by name just as you could if they were in a Hash.
# #
@ -13,13 +13,13 @@ class CSV
# #
class Row class Row
# #
# Construct a new CSV::Row from +headers+ and +fields+, which are expected # Constructs a new CSV::Row from +headers+ and +fields+, which are expected
# to be Arrays. If one Array is shorter than the other, it will be padded # to be Arrays. If one Array is shorter than the other, it will be padded
# with +nil+ objects. # with +nil+ objects.
# #
# The optional +header_row+ parameter can be set to +true+ to indicate, via # The optional +header_row+ parameter can be set to +true+ to indicate, via
# CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header # CSV::Row.header_row?() and CSV::Row.field_row?(), that this is a header
# row. Otherwise, the row is assumes to be a field row. # row. Otherwise, the row assumes to be a field row.
# #
# A CSV::Row object supports the following Array methods through delegation: # A CSV::Row object supports the following Array methods through delegation:
# #
@ -74,11 +74,11 @@ class CSV
# field( header, offset ) # field( header, offset )
# field( index ) # field( index )
# #
# This method will return the field value by +header+ or +index+. If a field # This method will return the field value by +header+ or +index+. If a field
# is not found, +nil+ is returned. # is not found, +nil+ is returned.
# #
# When provided, +offset+ ensures that a header match occurs on or later # When provided, +offset+ ensures that a header match occurs on or later
# than the +offset+ index. You can use this to find duplicate headers, # than the +offset+ index. You can use this to find duplicate headers,
# without resorting to hard-coding exact indices. # without resorting to hard-coding exact indices.
# #
def field(header_or_index, minimum_index = 0) def field(header_or_index, minimum_index = 0)
@ -142,7 +142,7 @@ class CSV
# assigns the +value+. # assigns the +value+.
# #
# Assigning past the end of the row with an index will set all pairs between # Assigning past the end of the row with an index will set all pairs between
# to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new # to <tt>[nil, nil]</tt>. Assigning to an unused header appends the new
# pair. # pair.
# #
def []=(*args) def []=(*args)
@ -172,8 +172,8 @@ class CSV
# <<( header_and_field_hash ) # <<( header_and_field_hash )
# #
# If a two-element Array is provided, it is assumed to be a header and field # If a two-element Array is provided, it is assumed to be a header and field
# and the pair is appended. A Hash works the same way with the key being # and the pair is appended. A Hash works the same way with the key being
# the header and the value being the field. Anything else is assumed to be # the header and the value being the field. Anything else is assumed to be
# a lone field which is appended with a +nil+ header. # a lone field which is appended with a +nil+ header.
# #
# This method returns the row for chaining. # This method returns the row for chaining.
@ -191,7 +191,7 @@ class CSV
end end
# #
# A shortcut for appending multiple fields. Equivalent to: # A shortcut for appending multiple fields. Equivalent to:
# #
# args.each { |arg| csv_row << arg } # args.each { |arg| csv_row << arg }
# #
@ -209,8 +209,8 @@ class CSV
# delete( header, offset ) # delete( header, offset )
# delete( index ) # delete( index )
# #
# Used to remove a pair from the row by +header+ or +index+. The pair is # Removes a pair from the row by +header+ or +index+. The pair is
# located as described in CSV::Row.field(). The deleted pair is returned, # located as described in CSV::Row.field(). The deleted pair is returned,
# or +nil+ if a pair could not be found. # or +nil+ if a pair could not be found.
# #
def delete(header_or_index, minimum_index = 0) def delete(header_or_index, minimum_index = 0)
@ -325,7 +325,7 @@ class CSV
end end
# #
# Collapses the row into a simple Hash. Be warned that this discards field # Collapses the row into a simple Hash. Be warned that this discards field
# order and clobbers duplicate fields. # order and clobbers duplicate fields.
# #
def to_h def to_h
@ -340,7 +340,7 @@ class CSV
alias_method :to_ary, :to_a alias_method :to_ary, :to_a
# #
# Returns the row as a CSV String. Headers are not used. Equivalent to: # Returns the row as a CSV String. Headers are not used. Equivalent to:
# #
# csv_row.fields.to_csv( options ) # csv_row.fields.to_csv( options )
# #
@ -367,7 +367,9 @@ class CSV
end end
end end
#
# A summary of fields, by header, in an ASCII compatible String. # A summary of fields, by header, in an ASCII compatible String.
#
def inspect def inspect
str = ["#<", self.class.to_s] str = ["#<", self.class.to_s]
each do |header, field| each do |header, field|

View File

@ -5,7 +5,7 @@ require "forwardable"
class CSV class CSV
# #
# A CSV::Table is a two-dimensional data structure for representing CSV # A CSV::Table is a two-dimensional data structure for representing CSV
# documents. Tables allow you to work with the data by row or column, # documents. Tables allow you to work with the data by row or column,
# manipulate the data, and even convert the results back to CSV, if needed. # manipulate the data, and even convert the results back to CSV, if needed.
# #
# All tables returned by CSV will be constructed from this class, if header # All tables returned by CSV will be constructed from this class, if header
@ -13,8 +13,8 @@ class CSV
# #
class Table class Table
# #
# Construct a new CSV::Table from +array_of_rows+, which are expected # Constructs a new CSV::Table from +array_of_rows+, which are expected
# to be CSV::Row objects. All rows are assumed to have the same headers. # to be CSV::Row objects. All rows are assumed to have the same headers.
# #
# The optional +headers+ parameter can be set to Array of headers. # The optional +headers+ parameter can be set to Array of headers.
# If headers aren't set, headers are fetched from CSV::Row objects. # If headers aren't set, headers are fetched from CSV::Row objects.
@ -55,11 +55,11 @@ class CSV
def_delegators :@table, :empty?, :length, :size def_delegators :@table, :empty?, :length, :size
# #
# Returns a duplicate table object, in column mode. This is handy for # Returns a duplicate table object, in column mode. This is handy for
# chaining in a single call without changing the table mode, but be aware # chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets. # that this method can consume a fair amount of memory for bigger data sets.
# #
# This method returns the duplicate table for chaining. Don't chain # This method returns the duplicate table for chaining. Don't chain
# destructive methods (like []=()) this way though, since you are working # destructive methods (like []=()) this way though, since you are working
# with a duplicate. # with a duplicate.
# #
@ -68,7 +68,7 @@ class CSV
end end
# #
# Switches the mode of this table to column mode. All calls to indexing and # Switches the mode of this table to column mode. All calls to indexing and
# iteration methods will work with columns until the mode is changed again. # iteration methods will work with columns until the mode is changed again.
# #
# This method returns the table and is safe to chain. # This method returns the table and is safe to chain.
@ -80,7 +80,7 @@ class CSV
end end
# #
# Returns a duplicate table object, in mixed mode. This is handy for # Returns a duplicate table object, in mixed mode. This is handy for
# chaining in a single call without changing the table mode, but be aware # chaining in a single call without changing the table mode, but be aware
# that this method can consume a fair amount of memory for bigger data sets. # that this method can consume a fair amount of memory for bigger data sets.
# #
@ -93,9 +93,9 @@ class CSV
end end
# #
# Switches the mode of this table to mixed mode. All calls to indexing and # Switches the mode of this table to mixed mode. All calls to indexing and
# iteration methods will use the default intelligent indexing system until # iteration methods will use the default intelligent indexing system until
# the mode is changed again. In mixed mode an index is assumed to be a row # the mode is changed again. In mixed mode an index is assumed to be a row
# reference while anything else is assumed to be column access by headers. # reference while anything else is assumed to be column access by headers.
# #
# This method returns the table and is safe to chain. # This method returns the table and is safe to chain.
@ -120,7 +120,7 @@ class CSV
end end
# #
# Switches the mode of this table to row mode. All calls to indexing and # Switches the mode of this table to row mode. All calls to indexing and
# iteration methods will work with rows until the mode is changed again. # iteration methods will work with rows until the mode is changed again.
# #
# This method returns the table and is safe to chain. # This method returns the table and is safe to chain.
@ -146,7 +146,7 @@ class CSV
# #
# In the default mixed mode, this method returns rows for index access and # In the default mixed mode, this method returns rows for index access and
# columns for header access. You can force the index association by first # columns for header access. You can force the index association by first
# calling by_col!() or by_row!(). # calling by_col!() or by_row!().
# #
# Columns are returned as an Array of values. Altering that Array has no # Columns are returned as an Array of values. Altering that Array has no
@ -163,18 +163,18 @@ class CSV
# #
# In the default mixed mode, this method assigns rows for index access and # In the default mixed mode, this method assigns rows for index access and
# columns for header access. You can force the index association by first # columns for header access. You can force the index association by first
# calling by_col!() or by_row!(). # calling by_col!() or by_row!().
# #
# Rows may be set to an Array of values (which will inherit the table's # Rows may be set to an Array of values (which will inherit the table's
# headers()) or a CSV::Row. # headers()) or a CSV::Row.
# #
# Columns may be set to a single value, which is copied to each row of the # Columns may be set to a single value, which is copied to each row of the
# column, or an Array of values. Arrays of values are assigned to rows top # column, or an Array of values. Arrays of values are assigned to rows top
# to bottom in row major order. Excess values are ignored and if the Array # to bottom in row major order. Excess values are ignored and if the Array
# does not have a value for each row the extra rows will receive a +nil+. # does not have a value for each row the extra rows will receive a +nil+.
# #
# Assigning to an existing column or row clobbers the data. Assigning to # Assigning to an existing column or row clobbers the data. Assigning to
# new columns creates them at the right end of the table. # new columns creates them at the right end of the table.
# #
def []=(index_or_header, value) def []=(index_or_header, value)
@ -212,9 +212,9 @@ class CSV
# #
# The mixed mode default is to treat a list of indices as row access, # The mixed mode default is to treat a list of indices as row access,
# returning the rows indicated. Anything else is considered columnar # returning the rows indicated. Anything else is considered columnar
# access. For columnar access, the return set has an Array for each row # access. For columnar access, the return set has an Array for each row
# with the values indicated by the headers in each Array. You can force # with the values indicated by the headers in each Array. You can force
# column or row mode using by_col!() or by_row!(). # column or row mode using by_col!() or by_row!().
# #
# You cannot mix column and row access. # You cannot mix column and row access.
@ -234,7 +234,7 @@ class CSV
end end
# #
# Adds a new row to the bottom end of this table. You can provide an Array, # Adds a new row to the bottom end of this table. You can provide an Array,
# which will be converted to a CSV::Row (inheriting the table's headers()), # which will be converted to a CSV::Row (inheriting the table's headers()),
# or a CSV::Row. # or a CSV::Row.
# #
@ -251,7 +251,7 @@ class CSV
end end
# #
# A shortcut for appending multiple rows. Equivalent to: # A shortcut for appending multiple rows. Equivalent to:
# #
# rows.each { |row| self << row } # rows.each { |row| self << row }
# #
@ -264,9 +264,9 @@ class CSV
end end
# #
# Removes and returns the indicated columns or rows. In the default mixed # Removes and returns the indicated columns or rows. In the default mixed
# mode indices refer to rows and everything else is assumed to be a column # mode indices refer to rows and everything else is assumed to be a column
# headers. Use by_col!() or by_row!() to force the lookup. # headers. Use by_col!() or by_row!() to force the lookup.
# #
def delete(*indexes_or_headers) def delete(*indexes_or_headers)
if indexes_or_headers.empty? if indexes_or_headers.empty?
@ -293,9 +293,9 @@ class CSV
end end
# #
# Removes any column or row for which the block returns +true+. In the # Removes any column or row for which the block returns +true+. In the
# default mixed mode or row mode, iteration is the standard row major # default mixed mode or row mode, iteration is the standard row major
# walking of rows. In column mode, iteration will +yield+ two element # walking of rows. In column mode, iteration will +yield+ two element
# tuples containing the column name and an Array of values for that column. # tuples containing the column name and an Array of values for that column.
# #
# This method returns the table for chaining. # This method returns the table for chaining.
@ -321,7 +321,7 @@ class CSV
# #
# In the default mixed mode or row mode, iteration is the standard row major # In the default mixed mode or row mode, iteration is the standard row major
# walking of rows. In column mode, iteration will +yield+ two element # walking of rows. In column mode, iteration will +yield+ two element
# tuples containing the column name and an Array of values for that column. # tuples containing the column name and an Array of values for that column.
# #
# This method returns the table for chaining. # This method returns the table for chaining.
@ -347,7 +347,7 @@ class CSV
end end
# #
# Returns the table as an Array of Arrays. Headers will be the first row, # Returns the table as an Array of Arrays. Headers will be the first row,
# then all of the field rows will follow. # then all of the field rows will follow.
# #
def to_a def to_a
@ -360,7 +360,7 @@ class CSV
end end
# #
# Returns the table as a complete CSV String. Headers will be listed first, # Returns the table as a complete CSV String. Headers will be listed first,
# then all of the field rows. # then all of the field rows.
# #
# This method assumes you want the Table.headers(), unless you explicitly # This method assumes you want the Table.headers(), unless you explicitly

View File

@ -2,5 +2,5 @@
class CSV class CSV
# The version of the installed library. # The version of the installed library.
VERSION = "3.1.1" VERSION = "3.1.2"
end end

View File

@ -6,7 +6,12 @@ require_relative "row"
using CSV::MatchP if CSV.const_defined?(:MatchP) using CSV::MatchP if CSV.const_defined?(:MatchP)
class CSV class CSV
# Note: Don't use this class directly. This is an internal class.
class Writer class Writer
#
# A CSV::Writer receives an output, prepares the header, format and output.
# It allows us to write new rows in the object and rewind it.
#
attr_reader :lineno attr_reader :lineno
attr_reader :headers attr_reader :headers
@ -22,6 +27,9 @@ class CSV
@fields_converter = @options[:fields_converter] @fields_converter = @options[:fields_converter]
end end
#
# Adds a new row
#
def <<(row) def <<(row)
case row case row
when Row when Row
@ -47,6 +55,9 @@ class CSV
self self
end end
#
# Winds back to the beginning
#
def rewind def rewind
@lineno = 0 @lineno = 0
@headers = nil if @options[:headers].nil? @headers = nil if @options[:headers].nil?

View File

@ -233,11 +233,21 @@ line,5,jkl
assert_equal([["a"]], CSV.parse("a\r\n")) assert_equal([["a"]], CSV.parse("a\r\n"))
end end
def test_seeked_string_io
input_with_bom = StringIO.new("\ufeffあ,い,う\r\na,b,c\r\n")
input_with_bom.read(3)
assert_equal([
["", "", ""],
["a", "b", "c"],
],
CSV.new(input_with_bom).each.to_a)
end
private private
def assert_parse_errors_out(*args, **options) def assert_parse_errors_out(data, **options)
assert_raise(CSV::MalformedCSVError) do assert_raise(CSV::MalformedCSVError) do
Timeout.timeout(0.2) do Timeout.timeout(0.2) do
CSV.parse(*args, **options) CSV.parse(data, **options)
fail("Parse didn't error out") fail("Parse didn't error out")
end end
end end

View File

@ -312,12 +312,12 @@ A
end end
def test_parse_empty def test_parse_empty
assert_equal(CSV::Table.new([], **{}), assert_equal(CSV::Table.new([]),
CSV.parse("", headers: true)) CSV.parse("", headers: true))
end end
def test_parse_empty_line def test_parse_empty_line
assert_equal(CSV::Table.new([], **{}), assert_equal(CSV::Table.new([]),
CSV.parse("\n", headers: true)) CSV.parse("\n", headers: true))
end end

View File

@ -6,7 +6,7 @@ require_relative "../helper"
class TestCSVParseRewind < Test::Unit::TestCase class TestCSVParseRewind < Test::Unit::TestCase
extend DifferentOFS extend DifferentOFS
def parse(data, options={}) def parse(data, **options)
csv = CSV.new(data, **options) csv = CSV.new(data, **options)
records = csv.to_a records = csv.to_a
csv.rewind csv.rewind

View File

@ -268,11 +268,11 @@ class TestCSVEncodings < Test::Unit::TestCase
private private
def assert_parses(fields, encoding, options = { }) def assert_parses(fields, encoding, **options)
encoding = Encoding.find(encoding) unless encoding.is_a? Encoding encoding = Encoding.find(encoding) unless encoding.is_a? Encoding
orig_fields = fields orig_fields = fields
fields = encode_ary(fields, encoding) fields = encode_ary(fields, encoding)
data = ary_to_data(fields, options) data = ary_to_data(fields, **options)
parsed = CSV.parse(data, **options) parsed = CSV.parse(data, **options)
assert_equal(fields, parsed) assert_equal(fields, parsed)
parsed.flatten.each_with_index do |field, i| parsed.flatten.each_with_index do |field, i|
@ -285,7 +285,9 @@ class TestCSVEncodings < Test::Unit::TestCase
end end
end end
begin begin
CSV.open(@temp_csv_path, "rb:#{encoding}:#{__ENCODING__}", **options) do |csv| CSV.open(@temp_csv_path,
"rb:#{encoding}:#{__ENCODING__}",
**options) do |csv|
csv.each_with_index do |row, i| csv.each_with_index do |row, i|
assert_equal(orig_fields[i], row) assert_equal(orig_fields[i], row)
end end
@ -315,7 +317,7 @@ class TestCSVEncodings < Test::Unit::TestCase
ary.map { |row| row.map { |field| field.encode(encoding) } } ary.map { |row| row.map { |field| field.encode(encoding) } }
end end
def ary_to_data(ary, options = { }) def ary_to_data(ary, **options)
encoding = ary.flatten.first.encoding encoding = ary.flatten.first.encoding
quote_char = (options[:quote_char] || '"').encode(encoding) quote_char = (options[:quote_char] || '"').encode(encoding)
col_sep = (options[:col_sep] || ",").encode(encoding) col_sep = (options[:col_sep] || ",").encode(encoding)
@ -327,9 +329,9 @@ class TestCSVEncodings < Test::Unit::TestCase
}.join('').encode(encoding) }.join('').encode(encoding)
end end
def encode_for_tests(data, options = { }) def encode_for_tests(data, **options)
yield ary_to_data(encode_ary(data, "UTF-8"), options) yield ary_to_data(encode_ary(data, "UTF-8"), **options)
yield ary_to_data(encode_ary(data, "UTF-16BE"), options) yield ary_to_data(encode_ary(data, "UTF-16BE"), **options)
end end
def each_encoding def each_encoding

View File

@ -52,6 +52,20 @@ line,4,jkl
assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";")) assert_equal([",,,", nil], CSV.parse_line(",,,;", col_sep: ";"))
end end
def test_col_sep_nil
assert_raise_with_message(ArgumentError,
":col_sep must be 1 or more characters: nil") do
CSV.parse(@sample_data, col_sep: nil)
end
end
def test_col_sep_empty
assert_raise_with_message(ArgumentError,
":col_sep must be 1 or more characters: \"\"") do
CSV.parse(@sample_data, col_sep: "")
end
end
def test_row_sep def test_row_sep
error = assert_raise(CSV::MalformedCSVError) do error = assert_raise(CSV::MalformedCSVError) do
CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n") CSV.parse_line("1,2,3\n,4,5\r\n", row_sep: "\r\n")
@ -110,10 +124,10 @@ line,4,jkl
def test_line def test_line
lines = [ lines = [
%Q(abc,def\n), %Q(\u{3000}abc,def\n),
%Q(abc,"d\nef"\n), %Q(\u{3000}abc,"d\nef"\n),
%Q(abc,"d\r\nef"\n), %Q(\u{3000}abc,"d\r\nef"\n),
%Q(abc,"d\ref") %Q(\u{3000}abc,"d\ref")
] ]
csv = CSV.new(lines.join('')) csv = CSV.new(lines.join(''))
lines.each do |line| lines.each do |line|

View File

@ -205,6 +205,32 @@ module TestCSVWriteGeneral
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"), assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
generate_line(row)) generate_line(row))
end end
def test_encoding_with_default_internal
with_default_internal(Encoding::UTF_8) do
row = ["", "", ""].collect {|field| field.encode("EUC-JP")}
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
generate_line(row, encoding: Encoding::EUC_JP))
end
end
def test_with_default_internal
with_default_internal(Encoding::UTF_8) do
row = ["", "", ""].collect {|field| field.encode("EUC-JP")}
assert_equal(%Q[あ,い,う#{$INPUT_RECORD_SEPARATOR}].encode("EUC-JP"),
generate_line(row))
end
end
def with_default_internal(encoding)
original = Encoding.default_internal
begin
Encoding.default_internal = encoding
yield
ensure
Encoding.default_internal = original
end
end
end end
class TestCSVWriteGeneralGenerateLine < Test::Unit::TestCase class TestCSVWriteGeneralGenerateLine < Test::Unit::TestCase