From 37abc2fb7ed77fd3063f0ac8fcadd55b80526f4b Mon Sep 17 00:00:00 2001 From: hsbt Date: Thu, 18 May 2017 02:42:16 +0000 Subject: [PATCH] Improve CSV parsing performance. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Patch by @joshpencheon (Josh Pencheon) [fix GH-1607] #### benchmark-ips results ``` trunk: Warming up -------------------------------------- 4.000 i/100ms Calculating ------------------------------------- 39.661 (±10.1%) i/s - 2.352k in 60.034781s with-patch: Warming up -------------------------------------- 5.000 i/100ms Calculating ------------------------------------- 60.521 (± 9.9%) i/s - 3.595k in 60.047157s ``` #### memory_profiler resuts ``` trunk: allocated memory by class ----------------------------------- 35588490 String 7454320 Array 294000 MatchData 37340 Regexp 11840 Hash 2400 CSV 1600 Proc 1280 Method 800 StringIO with-patch: allocated memory by class ----------------------------------- 18788490 String 3454320 Array 294000 MatchData 37340 Regexp 11840 Hash 2400 CSV 1600 Proc 1280 Method 800 StringIO ``` git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@58777 b2dd03c8-39d4-4d8f-98ff-823fe69b080e --- lib/csv.rb | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lib/csv.rb b/lib/csv.rb index e7aedc4558..9a861c7327 100644 --- a/lib/csv.rb +++ b/lib/csv.rb @@ -1876,7 +1876,7 @@ class CSV # If we are continuing a previous column if part.end_with?(@quote_char) && part.count(@quote_char) % 2 != 0 # extended column ends - csv[-1] = csv[-1].push(part[0..-2]).join("") + csv.last << part[0..-2] if csv.last =~ @parsers[:stray_quote] raise MalformedCSVError, "Missing or stray quote in line #{lineno + 1}" @@ -1884,13 +1884,13 @@ class CSV csv.last.gsub!(@double_quote_char, @quote_char) in_extended_col = false else - csv.last.push(part, @col_sep) + csv.last << part << @col_sep end elsif part.start_with?(@quote_char) # If we are starting a new quoted column if part.count(@quote_char) % 2 != 0 # start an extended column - csv << [part[1..-1], @col_sep] + csv << (part[1..-1] << @col_sep) in_extended_col = true elsif part.end_with?(@quote_char) # regular quoted column @@ -1933,7 +1933,7 @@ class CSV if @io.eof? raise MalformedCSVError, "Unclosed quoted field on line #{lineno + 1}." - elsif @field_size_limit and csv.last.sum(&:size) >= @field_size_limit + elsif @field_size_limit and csv.last.size >= @field_size_limit raise MalformedCSVError, "Field size exceeded on line #{lineno + 1}." end # otherwise, we need to loop and pull some more data to complete the row