diff --git a/mysql-test/r/csv.result b/mysql-test/r/csv.result index 4b96f5a5ed0..97996b484bb 100644 --- a/mysql-test/r/csv.result +++ b/mysql-test/r/csv.result @@ -5407,4 +5407,60 @@ test.t1 repair status OK select * from t1 limit 1; a drop table t1; +# +# Test for the following cases +# 1) integers and strings enclosed in quotes +# 2) integers and strings not enclosed in quotes +# 3) \X characters with quotes +# 4) \X characters outside quotes +# +CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv; +# remove the already existing .CSV file if any +# create the .CSV file that contains the hard-coded data used in +# testing +1,"integer sans quotes" +1,string sans quotes +1,quotes"in between" strings +"1",Integer with quote and string with no quote +1,"escape sequence \n \" \\ \r \a within quotes" +1,escape sequence \n \" \\ \r \a without quotes +# select from the table in which the data has been filled in using +# the hard-coded .CSV file +SELECT * FROM t1; +c1 c2 +1 integer sans quotes +1 string sans quotes +1 quotes"in between" strings +1 Integer with quote and string with no quote +1 escape sequence + " \ \a within quotes +1 escape sequence + " \ \a without quotes +DROP TABLE t1; +# Test for the case when a field begins with a quote, but does not end in a +# quote. +# Note: This results in an error. +CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv; +# remove the already existing .CSV file if any +# create the .CSV file that contains the hard-coded data used in +# testing +1,"string only at the beginning quotes +# select from the table in which the data has been filled in using +# the hard-coded .CSV file +SELECT * FROM t1; +ERROR HY000: Table 't1' is marked as crashed and should be repaired +DROP TABLE t1; +# Test for the case when a field ends with a quote, but does not begin in a +# quote. +# Note: This results in an error. +CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv; +# remove the already existing .CSV file if any +# create the .CSV file that contains the hard-coded data used in +# testing +1,string with only ending quotes" +# select from the table in which the data has been filled in using +# the hard-coded .CSV file +SELECT * FROM t1; +ERROR HY000: Table 't1' is marked as crashed and should be repaired +DROP TABLE t1; End of 5.1 tests diff --git a/mysql-test/t/csv.test b/mysql-test/t/csv.test index cdf274190dd..ea949f463c9 100644 --- a/mysql-test/t/csv.test +++ b/mysql-test/t/csv.test @@ -1819,4 +1819,84 @@ repair table t1; select * from t1 limit 1; drop table t1; +# +# Bug #40814 CSV engine does not parse \X characters when they occur in unquoted fields +# + +--echo # +--echo # Test for the following cases +--echo # 1) integers and strings enclosed in quotes +--echo # 2) integers and strings not enclosed in quotes +--echo # 3) \X characters with quotes +--echo # 4) \X characters outside quotes +--echo # + +CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv; + +--echo # remove the already existing .CSV file if any +--remove_file $MYSQLD_DATADIR/test/t1.CSV + +--echo # create the .CSV file that contains the hard-coded data used in +--echo # testing +--write_file $MYSQLD_DATADIR/test/t1.CSV +1,"integer sans quotes" +1,string sans quotes +1,quotes"in between" strings +"1",Integer with quote and string with no quote +1,"escape sequence \n \" \\ \r \a within quotes" +1,escape sequence \n \" \\ \r \a without quotes +EOF +--cat_file $MYSQLD_DATADIR/test/t1.CSV + +--echo # select from the table in which the data has been filled in using +--echo # the hard-coded .CSV file +SELECT * FROM t1; + +DROP TABLE t1; + +--echo # Test for the case when a field begins with a quote, but does not end in a +--echo # quote. +--echo # Note: This results in an error. + +CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv; + +--echo # remove the already existing .CSV file if any +--remove_file $MYSQLD_DATADIR/test/t1.CSV + +--echo # create the .CSV file that contains the hard-coded data used in +--echo # testing +--write_file $MYSQLD_DATADIR/test/t1.CSV +1,"string only at the beginning quotes +EOF +--cat_file $MYSQLD_DATADIR/test/t1.CSV + +--echo # select from the table in which the data has been filled in using +--echo # the hard-coded .CSV file +--error ER_CRASHED_ON_USAGE +SELECT * FROM t1; + +DROP TABLE t1; + +--echo # Test for the case when a field ends with a quote, but does not begin in a +--echo # quote. +--echo # Note: This results in an error. + +CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv; + +--echo # remove the already existing .CSV file if any +--remove_file $MYSQLD_DATADIR/test/t1.CSV + +--echo # create the .CSV file that contains the hard-coded data used in +--echo # testing +--write_file $MYSQLD_DATADIR/test/t1.CSV +1,string with only ending quotes" +EOF +--cat_file $MYSQLD_DATADIR/test/t1.CSV + +--echo # select from the table in which the data has been filled in using +--echo # the hard-coded .CSV file +--error ER_CRASHED_ON_USAGE +SELECT * FROM t1; + +DROP TABLE t1; --echo End of 5.1 tests diff --git a/storage/csv/ha_tina.cc b/storage/csv/ha_tina.cc index 9cc0f1e607b..fac78986563 100644 --- a/storage/csv/ha_tina.cc +++ b/storage/csv/ha_tina.cc @@ -614,6 +614,33 @@ int ha_tina::find_current_row(uchar *buf) memset(buf, 0, table->s->null_bytes); + /* + Parse the line obtained using the following algorithm + + BEGIN + 1) Store the EOL (end of line) for the current row + 2) Until all the fields in the current query have not been + filled + 2.1) If the current character is a quote + 2.1.1) Until EOL has not been reached + a) If end of current field is reached, move + to next field and jump to step 2.3 + b) If current character is a \\ handle + \\n, \\r, \\, \\" + c) else append the current character into the buffer + before checking that EOL has not been reached. + 2.2) If the current character does not begin with a quote + 2.2.1) Until EOL has not been reached + a) If the end of field has been reached move to the + next field and jump to step 2.3 + b) If current character begins with \\ handle + \\n, \\r, \\, \\" + c) else append the current character into the buffer + before checking that EOL has not been reached. + 2.3) Store the current field value and jump to 2) + TERMINATE + */ + for (Field **field=table->field ; *field ; field++) { char curr_char; @@ -622,19 +649,23 @@ int ha_tina::find_current_row(uchar *buf) if (curr_offset >= end_offset) goto err; curr_char= file_buff->get_value(curr_offset); + /* Handle the case where the first character is a quote */ if (curr_char == '"') { - curr_offset++; // Incrementpast the first quote + /* Increment past the first quote */ + curr_offset++; - for(; curr_offset < end_offset; curr_offset++) + /* Loop through the row to extract the values for the current field */ + for ( ; curr_offset < end_offset; curr_offset++) { curr_char= file_buff->get_value(curr_offset); - // Need to convert line feeds! + /* check for end of the current field */ if (curr_char == '"' && (curr_offset == end_offset - 1 || file_buff->get_value(curr_offset + 1) == ',')) { - curr_offset+= 2; // Move past the , and the " + /* Move past the , and the " */ + curr_offset+= 2; break; } if (curr_char == '\\' && curr_offset != (end_offset - 1)) @@ -656,7 +687,7 @@ int ha_tina::find_current_row(uchar *buf) else // ordinary symbol { /* - We are at final symbol and no last quote was found => + If we are at final symbol and no last quote was found => we are working with a damaged file. */ if (curr_offset == end_offset - 1) @@ -667,15 +698,41 @@ int ha_tina::find_current_row(uchar *buf) } else { - for(; curr_offset < end_offset; curr_offset++) + for ( ; curr_offset < end_offset; curr_offset++) { curr_char= file_buff->get_value(curr_offset); + /* Move past the ,*/ if (curr_char == ',') { - curr_offset++; // Skip the , + curr_offset++; break; } - buffer.append(curr_char); + if (curr_char == '\\' && curr_offset != (end_offset - 1)) + { + curr_offset++; + curr_char= file_buff->get_value(curr_offset); + if (curr_char == 'r') + buffer.append('\r'); + else if (curr_char == 'n' ) + buffer.append('\n'); + else if (curr_char == '\\' || curr_char == '"') + buffer.append(curr_char); + else /* This could only happed with an externally created file */ + { + buffer.append('\\'); + buffer.append(curr_char); + } + } + else + { + /* + We are at the final symbol and a quote was found for the + unquoted field => We are working with a damaged field. + */ + if (curr_offset == end_offset - 1 && curr_char == '"') + goto err; + buffer.append(curr_char); + } } }