Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
When a .CSV file for table in the CSV engine contains \X characters as part of unquoted fields, e.g. 2,naraya\nan \n is not interpreted as a new line (it is however interpreted as a newline in a quoted field). The old algorithm copied the entire value for a unquoted field without parsing the \X characters. The new algorithm adds the capability to handle \X characters in the unquoted fields of a .CSV file.
This commit is contained in:
parent
187958a951
commit
1aecdd3148
@ -5407,4 +5407,60 @@ test.t1 repair status OK
|
||||
select * from t1 limit 1;
|
||||
a
|
||||
drop table t1;
|
||||
#
|
||||
# Test for the following cases
|
||||
# 1) integers and strings enclosed in quotes
|
||||
# 2) integers and strings not enclosed in quotes
|
||||
# 3) \X characters with quotes
|
||||
# 4) \X characters outside quotes
|
||||
#
|
||||
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||
# remove the already existing .CSV file if any
|
||||
# create the .CSV file that contains the hard-coded data used in
|
||||
# testing
|
||||
1,"integer sans quotes"
|
||||
1,string sans quotes
|
||||
1,quotes"in between" strings
|
||||
"1",Integer with quote and string with no quote
|
||||
1,"escape sequence \n \" \\ \r \a within quotes"
|
||||
1,escape sequence \n \" \\ \r \a without quotes
|
||||
# select from the table in which the data has been filled in using
|
||||
# the hard-coded .CSV file
|
||||
SELECT * FROM t1;
|
||||
c1 c2
|
||||
1 integer sans quotes
|
||||
1 string sans quotes
|
||||
1 quotes"in between" strings
|
||||
1 Integer with quote and string with no quote
|
||||
1 escape sequence
|
||||
" \
\a within quotes
|
||||
1 escape sequence
|
||||
" \
\a without quotes
|
||||
DROP TABLE t1;
|
||||
# Test for the case when a field begins with a quote, but does not end in a
|
||||
# quote.
|
||||
# Note: This results in an error.
|
||||
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||
# remove the already existing .CSV file if any
|
||||
# create the .CSV file that contains the hard-coded data used in
|
||||
# testing
|
||||
1,"string only at the beginning quotes
|
||||
# select from the table in which the data has been filled in using
|
||||
# the hard-coded .CSV file
|
||||
SELECT * FROM t1;
|
||||
ERROR HY000: Table 't1' is marked as crashed and should be repaired
|
||||
DROP TABLE t1;
|
||||
# Test for the case when a field ends with a quote, but does not begin in a
|
||||
# quote.
|
||||
# Note: This results in an error.
|
||||
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||
# remove the already existing .CSV file if any
|
||||
# create the .CSV file that contains the hard-coded data used in
|
||||
# testing
|
||||
1,string with only ending quotes"
|
||||
# select from the table in which the data has been filled in using
|
||||
# the hard-coded .CSV file
|
||||
SELECT * FROM t1;
|
||||
ERROR HY000: Table 't1' is marked as crashed and should be repaired
|
||||
DROP TABLE t1;
|
||||
End of 5.1 tests
|
||||
|
@ -1819,4 +1819,84 @@ repair table t1;
|
||||
select * from t1 limit 1;
|
||||
drop table t1;
|
||||
|
||||
#
|
||||
# Bug #40814 CSV engine does not parse \X characters when they occur in unquoted fields
|
||||
#
|
||||
|
||||
--echo #
|
||||
--echo # Test for the following cases
|
||||
--echo # 1) integers and strings enclosed in quotes
|
||||
--echo # 2) integers and strings not enclosed in quotes
|
||||
--echo # 3) \X characters with quotes
|
||||
--echo # 4) \X characters outside quotes
|
||||
--echo #
|
||||
|
||||
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||
|
||||
--echo # remove the already existing .CSV file if any
|
||||
--remove_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
|
||||
--echo # create the .CSV file that contains the hard-coded data used in
|
||||
--echo # testing
|
||||
--write_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
1,"integer sans quotes"
|
||||
1,string sans quotes
|
||||
1,quotes"in between" strings
|
||||
"1",Integer with quote and string with no quote
|
||||
1,"escape sequence \n \" \\ \r \a within quotes"
|
||||
1,escape sequence \n \" \\ \r \a without quotes
|
||||
EOF
|
||||
--cat_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
|
||||
--echo # select from the table in which the data has been filled in using
|
||||
--echo # the hard-coded .CSV file
|
||||
SELECT * FROM t1;
|
||||
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo # Test for the case when a field begins with a quote, but does not end in a
|
||||
--echo # quote.
|
||||
--echo # Note: This results in an error.
|
||||
|
||||
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||
|
||||
--echo # remove the already existing .CSV file if any
|
||||
--remove_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
|
||||
--echo # create the .CSV file that contains the hard-coded data used in
|
||||
--echo # testing
|
||||
--write_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
1,"string only at the beginning quotes
|
||||
EOF
|
||||
--cat_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
|
||||
--echo # select from the table in which the data has been filled in using
|
||||
--echo # the hard-coded .CSV file
|
||||
--error ER_CRASHED_ON_USAGE
|
||||
SELECT * FROM t1;
|
||||
|
||||
DROP TABLE t1;
|
||||
|
||||
--echo # Test for the case when a field ends with a quote, but does not begin in a
|
||||
--echo # quote.
|
||||
--echo # Note: This results in an error.
|
||||
|
||||
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||
|
||||
--echo # remove the already existing .CSV file if any
|
||||
--remove_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
|
||||
--echo # create the .CSV file that contains the hard-coded data used in
|
||||
--echo # testing
|
||||
--write_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
1,string with only ending quotes"
|
||||
EOF
|
||||
--cat_file $MYSQLD_DATADIR/test/t1.CSV
|
||||
|
||||
--echo # select from the table in which the data has been filled in using
|
||||
--echo # the hard-coded .CSV file
|
||||
--error ER_CRASHED_ON_USAGE
|
||||
SELECT * FROM t1;
|
||||
|
||||
DROP TABLE t1;
|
||||
--echo End of 5.1 tests
|
||||
|
@ -614,6 +614,33 @@ int ha_tina::find_current_row(uchar *buf)
|
||||
|
||||
memset(buf, 0, table->s->null_bytes);
|
||||
|
||||
/*
|
||||
Parse the line obtained using the following algorithm
|
||||
|
||||
BEGIN
|
||||
1) Store the EOL (end of line) for the current row
|
||||
2) Until all the fields in the current query have not been
|
||||
filled
|
||||
2.1) If the current character is a quote
|
||||
2.1.1) Until EOL has not been reached
|
||||
a) If end of current field is reached, move
|
||||
to next field and jump to step 2.3
|
||||
b) If current character is a \\ handle
|
||||
\\n, \\r, \\, \\"
|
||||
c) else append the current character into the buffer
|
||||
before checking that EOL has not been reached.
|
||||
2.2) If the current character does not begin with a quote
|
||||
2.2.1) Until EOL has not been reached
|
||||
a) If the end of field has been reached move to the
|
||||
next field and jump to step 2.3
|
||||
b) If current character begins with \\ handle
|
||||
\\n, \\r, \\, \\"
|
||||
c) else append the current character into the buffer
|
||||
before checking that EOL has not been reached.
|
||||
2.3) Store the current field value and jump to 2)
|
||||
TERMINATE
|
||||
*/
|
||||
|
||||
for (Field **field=table->field ; *field ; field++)
|
||||
{
|
||||
char curr_char;
|
||||
@ -622,19 +649,23 @@ int ha_tina::find_current_row(uchar *buf)
|
||||
if (curr_offset >= end_offset)
|
||||
goto err;
|
||||
curr_char= file_buff->get_value(curr_offset);
|
||||
/* Handle the case where the first character is a quote */
|
||||
if (curr_char == '"')
|
||||
{
|
||||
curr_offset++; // Incrementpast the first quote
|
||||
/* Increment past the first quote */
|
||||
curr_offset++;
|
||||
|
||||
for(; curr_offset < end_offset; curr_offset++)
|
||||
/* Loop through the row to extract the values for the current field */
|
||||
for ( ; curr_offset < end_offset; curr_offset++)
|
||||
{
|
||||
curr_char= file_buff->get_value(curr_offset);
|
||||
// Need to convert line feeds!
|
||||
/* check for end of the current field */
|
||||
if (curr_char == '"' &&
|
||||
(curr_offset == end_offset - 1 ||
|
||||
file_buff->get_value(curr_offset + 1) == ','))
|
||||
{
|
||||
curr_offset+= 2; // Move past the , and the "
|
||||
/* Move past the , and the " */
|
||||
curr_offset+= 2;
|
||||
break;
|
||||
}
|
||||
if (curr_char == '\\' && curr_offset != (end_offset - 1))
|
||||
@ -656,7 +687,7 @@ int ha_tina::find_current_row(uchar *buf)
|
||||
else // ordinary symbol
|
||||
{
|
||||
/*
|
||||
We are at final symbol and no last quote was found =>
|
||||
If we are at final symbol and no last quote was found =>
|
||||
we are working with a damaged file.
|
||||
*/
|
||||
if (curr_offset == end_offset - 1)
|
||||
@ -667,15 +698,41 @@ int ha_tina::find_current_row(uchar *buf)
|
||||
}
|
||||
else
|
||||
{
|
||||
for(; curr_offset < end_offset; curr_offset++)
|
||||
for ( ; curr_offset < end_offset; curr_offset++)
|
||||
{
|
||||
curr_char= file_buff->get_value(curr_offset);
|
||||
/* Move past the ,*/
|
||||
if (curr_char == ',')
|
||||
{
|
||||
curr_offset++; // Skip the ,
|
||||
curr_offset++;
|
||||
break;
|
||||
}
|
||||
buffer.append(curr_char);
|
||||
if (curr_char == '\\' && curr_offset != (end_offset - 1))
|
||||
{
|
||||
curr_offset++;
|
||||
curr_char= file_buff->get_value(curr_offset);
|
||||
if (curr_char == 'r')
|
||||
buffer.append('\r');
|
||||
else if (curr_char == 'n' )
|
||||
buffer.append('\n');
|
||||
else if (curr_char == '\\' || curr_char == '"')
|
||||
buffer.append(curr_char);
|
||||
else /* This could only happed with an externally created file */
|
||||
{
|
||||
buffer.append('\\');
|
||||
buffer.append(curr_char);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
/*
|
||||
We are at the final symbol and a quote was found for the
|
||||
unquoted field => We are working with a damaged field.
|
||||
*/
|
||||
if (curr_offset == end_offset - 1 && curr_char == '"')
|
||||
goto err;
|
||||
buffer.append(curr_char);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user