Bug#40814 CSV engine does not parse \X characters when they occur in unquoted fields
When a .CSV file for table in the CSV engine contains \X characters as part of unquoted fields, e.g. 2,naraya\nan \n is not interpreted as a new line (it is however interpreted as a newline in a quoted field). The old algorithm copied the entire value for a unquoted field without parsing the \X characters. The new algorithm adds the capability to handle \X characters in the unquoted fields of a .CSV file.
This commit is contained in:
parent
187958a951
commit
1aecdd3148
@ -5407,4 +5407,60 @@ test.t1 repair status OK
|
|||||||
select * from t1 limit 1;
|
select * from t1 limit 1;
|
||||||
a
|
a
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
#
|
||||||
|
# Test for the following cases
|
||||||
|
# 1) integers and strings enclosed in quotes
|
||||||
|
# 2) integers and strings not enclosed in quotes
|
||||||
|
# 3) \X characters with quotes
|
||||||
|
# 4) \X characters outside quotes
|
||||||
|
#
|
||||||
|
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||||
|
# remove the already existing .CSV file if any
|
||||||
|
# create the .CSV file that contains the hard-coded data used in
|
||||||
|
# testing
|
||||||
|
1,"integer sans quotes"
|
||||||
|
1,string sans quotes
|
||||||
|
1,quotes"in between" strings
|
||||||
|
"1",Integer with quote and string with no quote
|
||||||
|
1,"escape sequence \n \" \\ \r \a within quotes"
|
||||||
|
1,escape sequence \n \" \\ \r \a without quotes
|
||||||
|
# select from the table in which the data has been filled in using
|
||||||
|
# the hard-coded .CSV file
|
||||||
|
SELECT * FROM t1;
|
||||||
|
c1 c2
|
||||||
|
1 integer sans quotes
|
||||||
|
1 string sans quotes
|
||||||
|
1 quotes"in between" strings
|
||||||
|
1 Integer with quote and string with no quote
|
||||||
|
1 escape sequence
|
||||||
|
" \
\a within quotes
|
||||||
|
1 escape sequence
|
||||||
|
" \
\a without quotes
|
||||||
|
DROP TABLE t1;
|
||||||
|
# Test for the case when a field begins with a quote, but does not end in a
|
||||||
|
# quote.
|
||||||
|
# Note: This results in an error.
|
||||||
|
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||||
|
# remove the already existing .CSV file if any
|
||||||
|
# create the .CSV file that contains the hard-coded data used in
|
||||||
|
# testing
|
||||||
|
1,"string only at the beginning quotes
|
||||||
|
# select from the table in which the data has been filled in using
|
||||||
|
# the hard-coded .CSV file
|
||||||
|
SELECT * FROM t1;
|
||||||
|
ERROR HY000: Table 't1' is marked as crashed and should be repaired
|
||||||
|
DROP TABLE t1;
|
||||||
|
# Test for the case when a field ends with a quote, but does not begin in a
|
||||||
|
# quote.
|
||||||
|
# Note: This results in an error.
|
||||||
|
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||||
|
# remove the already existing .CSV file if any
|
||||||
|
# create the .CSV file that contains the hard-coded data used in
|
||||||
|
# testing
|
||||||
|
1,string with only ending quotes"
|
||||||
|
# select from the table in which the data has been filled in using
|
||||||
|
# the hard-coded .CSV file
|
||||||
|
SELECT * FROM t1;
|
||||||
|
ERROR HY000: Table 't1' is marked as crashed and should be repaired
|
||||||
|
DROP TABLE t1;
|
||||||
End of 5.1 tests
|
End of 5.1 tests
|
||||||
|
@ -1819,4 +1819,84 @@ repair table t1;
|
|||||||
select * from t1 limit 1;
|
select * from t1 limit 1;
|
||||||
drop table t1;
|
drop table t1;
|
||||||
|
|
||||||
|
#
|
||||||
|
# Bug #40814 CSV engine does not parse \X characters when they occur in unquoted fields
|
||||||
|
#
|
||||||
|
|
||||||
|
--echo #
|
||||||
|
--echo # Test for the following cases
|
||||||
|
--echo # 1) integers and strings enclosed in quotes
|
||||||
|
--echo # 2) integers and strings not enclosed in quotes
|
||||||
|
--echo # 3) \X characters with quotes
|
||||||
|
--echo # 4) \X characters outside quotes
|
||||||
|
--echo #
|
||||||
|
|
||||||
|
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||||
|
|
||||||
|
--echo # remove the already existing .CSV file if any
|
||||||
|
--remove_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
|
||||||
|
--echo # create the .CSV file that contains the hard-coded data used in
|
||||||
|
--echo # testing
|
||||||
|
--write_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
1,"integer sans quotes"
|
||||||
|
1,string sans quotes
|
||||||
|
1,quotes"in between" strings
|
||||||
|
"1",Integer with quote and string with no quote
|
||||||
|
1,"escape sequence \n \" \\ \r \a within quotes"
|
||||||
|
1,escape sequence \n \" \\ \r \a without quotes
|
||||||
|
EOF
|
||||||
|
--cat_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
|
||||||
|
--echo # select from the table in which the data has been filled in using
|
||||||
|
--echo # the hard-coded .CSV file
|
||||||
|
SELECT * FROM t1;
|
||||||
|
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo # Test for the case when a field begins with a quote, but does not end in a
|
||||||
|
--echo # quote.
|
||||||
|
--echo # Note: This results in an error.
|
||||||
|
|
||||||
|
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||||
|
|
||||||
|
--echo # remove the already existing .CSV file if any
|
||||||
|
--remove_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
|
||||||
|
--echo # create the .CSV file that contains the hard-coded data used in
|
||||||
|
--echo # testing
|
||||||
|
--write_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
1,"string only at the beginning quotes
|
||||||
|
EOF
|
||||||
|
--cat_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
|
||||||
|
--echo # select from the table in which the data has been filled in using
|
||||||
|
--echo # the hard-coded .CSV file
|
||||||
|
--error ER_CRASHED_ON_USAGE
|
||||||
|
SELECT * FROM t1;
|
||||||
|
|
||||||
|
DROP TABLE t1;
|
||||||
|
|
||||||
|
--echo # Test for the case when a field ends with a quote, but does not begin in a
|
||||||
|
--echo # quote.
|
||||||
|
--echo # Note: This results in an error.
|
||||||
|
|
||||||
|
CREATE TABLE t1(c1 INT NOT NULL, c2 VARCHAR(50) NOT NULL) ENGINE=csv;
|
||||||
|
|
||||||
|
--echo # remove the already existing .CSV file if any
|
||||||
|
--remove_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
|
||||||
|
--echo # create the .CSV file that contains the hard-coded data used in
|
||||||
|
--echo # testing
|
||||||
|
--write_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
1,string with only ending quotes"
|
||||||
|
EOF
|
||||||
|
--cat_file $MYSQLD_DATADIR/test/t1.CSV
|
||||||
|
|
||||||
|
--echo # select from the table in which the data has been filled in using
|
||||||
|
--echo # the hard-coded .CSV file
|
||||||
|
--error ER_CRASHED_ON_USAGE
|
||||||
|
SELECT * FROM t1;
|
||||||
|
|
||||||
|
DROP TABLE t1;
|
||||||
--echo End of 5.1 tests
|
--echo End of 5.1 tests
|
||||||
|
@ -614,6 +614,33 @@ int ha_tina::find_current_row(uchar *buf)
|
|||||||
|
|
||||||
memset(buf, 0, table->s->null_bytes);
|
memset(buf, 0, table->s->null_bytes);
|
||||||
|
|
||||||
|
/*
|
||||||
|
Parse the line obtained using the following algorithm
|
||||||
|
|
||||||
|
BEGIN
|
||||||
|
1) Store the EOL (end of line) for the current row
|
||||||
|
2) Until all the fields in the current query have not been
|
||||||
|
filled
|
||||||
|
2.1) If the current character is a quote
|
||||||
|
2.1.1) Until EOL has not been reached
|
||||||
|
a) If end of current field is reached, move
|
||||||
|
to next field and jump to step 2.3
|
||||||
|
b) If current character is a \\ handle
|
||||||
|
\\n, \\r, \\, \\"
|
||||||
|
c) else append the current character into the buffer
|
||||||
|
before checking that EOL has not been reached.
|
||||||
|
2.2) If the current character does not begin with a quote
|
||||||
|
2.2.1) Until EOL has not been reached
|
||||||
|
a) If the end of field has been reached move to the
|
||||||
|
next field and jump to step 2.3
|
||||||
|
b) If current character begins with \\ handle
|
||||||
|
\\n, \\r, \\, \\"
|
||||||
|
c) else append the current character into the buffer
|
||||||
|
before checking that EOL has not been reached.
|
||||||
|
2.3) Store the current field value and jump to 2)
|
||||||
|
TERMINATE
|
||||||
|
*/
|
||||||
|
|
||||||
for (Field **field=table->field ; *field ; field++)
|
for (Field **field=table->field ; *field ; field++)
|
||||||
{
|
{
|
||||||
char curr_char;
|
char curr_char;
|
||||||
@ -622,19 +649,23 @@ int ha_tina::find_current_row(uchar *buf)
|
|||||||
if (curr_offset >= end_offset)
|
if (curr_offset >= end_offset)
|
||||||
goto err;
|
goto err;
|
||||||
curr_char= file_buff->get_value(curr_offset);
|
curr_char= file_buff->get_value(curr_offset);
|
||||||
|
/* Handle the case where the first character is a quote */
|
||||||
if (curr_char == '"')
|
if (curr_char == '"')
|
||||||
{
|
{
|
||||||
curr_offset++; // Incrementpast the first quote
|
/* Increment past the first quote */
|
||||||
|
curr_offset++;
|
||||||
|
|
||||||
|
/* Loop through the row to extract the values for the current field */
|
||||||
for ( ; curr_offset < end_offset; curr_offset++)
|
for ( ; curr_offset < end_offset; curr_offset++)
|
||||||
{
|
{
|
||||||
curr_char= file_buff->get_value(curr_offset);
|
curr_char= file_buff->get_value(curr_offset);
|
||||||
// Need to convert line feeds!
|
/* check for end of the current field */
|
||||||
if (curr_char == '"' &&
|
if (curr_char == '"' &&
|
||||||
(curr_offset == end_offset - 1 ||
|
(curr_offset == end_offset - 1 ||
|
||||||
file_buff->get_value(curr_offset + 1) == ','))
|
file_buff->get_value(curr_offset + 1) == ','))
|
||||||
{
|
{
|
||||||
curr_offset+= 2; // Move past the , and the "
|
/* Move past the , and the " */
|
||||||
|
curr_offset+= 2;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (curr_char == '\\' && curr_offset != (end_offset - 1))
|
if (curr_char == '\\' && curr_offset != (end_offset - 1))
|
||||||
@ -656,7 +687,7 @@ int ha_tina::find_current_row(uchar *buf)
|
|||||||
else // ordinary symbol
|
else // ordinary symbol
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
We are at final symbol and no last quote was found =>
|
If we are at final symbol and no last quote was found =>
|
||||||
we are working with a damaged file.
|
we are working with a damaged file.
|
||||||
*/
|
*/
|
||||||
if (curr_offset == end_offset - 1)
|
if (curr_offset == end_offset - 1)
|
||||||
@ -670,12 +701,38 @@ int ha_tina::find_current_row(uchar *buf)
|
|||||||
for ( ; curr_offset < end_offset; curr_offset++)
|
for ( ; curr_offset < end_offset; curr_offset++)
|
||||||
{
|
{
|
||||||
curr_char= file_buff->get_value(curr_offset);
|
curr_char= file_buff->get_value(curr_offset);
|
||||||
|
/* Move past the ,*/
|
||||||
if (curr_char == ',')
|
if (curr_char == ',')
|
||||||
{
|
{
|
||||||
curr_offset++; // Skip the ,
|
curr_offset++;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
if (curr_char == '\\' && curr_offset != (end_offset - 1))
|
||||||
|
{
|
||||||
|
curr_offset++;
|
||||||
|
curr_char= file_buff->get_value(curr_offset);
|
||||||
|
if (curr_char == 'r')
|
||||||
|
buffer.append('\r');
|
||||||
|
else if (curr_char == 'n' )
|
||||||
|
buffer.append('\n');
|
||||||
|
else if (curr_char == '\\' || curr_char == '"')
|
||||||
buffer.append(curr_char);
|
buffer.append(curr_char);
|
||||||
|
else /* This could only happed with an externally created file */
|
||||||
|
{
|
||||||
|
buffer.append('\\');
|
||||||
|
buffer.append(curr_char);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
We are at the final symbol and a quote was found for the
|
||||||
|
unquoted field => We are working with a damaged field.
|
||||||
|
*/
|
||||||
|
if (curr_offset == end_offset - 1 && curr_char == '"')
|
||||||
|
goto err;
|
||||||
|
buffer.append(curr_char);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user