From 926106649f83984623c21910a2ce3ad982233f3e Mon Sep 17 00:00:00 2001 From: "kroki/tomash@moonlight.intranet" <> Date: Mon, 18 Sep 2006 22:02:06 +0400 Subject: [PATCH 1/8] BUG#9678: Client library hangs after network communication failure (back-port to 4.0) Socket timeouts in client library were used only on Windows. Additionally, in 4.0 write operations erroneously set read timeout. The solution is to use socket timeouts in client library on all systems were they are supported, and to differentiate between read and write timeouts. No test case is provided because it is impossible to simulate network failure in current test suite. --- include/violite.h | 6 +++--- libmysqld/lib_vio.c | 1 + sql/net_serv.cc | 4 ++-- vio/viosocket.c | 26 ++++++++++++++++++++------ 4 files changed, 26 insertions(+), 11 deletions(-) diff --git a/include/violite.h b/include/violite.h index 23fd694d355..ee19cbd62fe 100644 --- a/include/violite.h +++ b/include/violite.h @@ -77,7 +77,7 @@ my_bool vio_peer_addr(Vio* vio, char *buf, uint16 *port); /* Remotes in_addr */ void vio_in_addr(Vio *vio, struct in_addr *in); my_bool vio_poll_read(Vio *vio,uint timeout); -void vio_timeout(Vio *vio,uint timeout); +void vio_timeout(Vio *vio, uint which, uint timeout); #ifdef HAVE_OPENSSL #include @@ -140,7 +140,7 @@ Vio *new_VioSSL(struct st_VioSSLAcceptorFd *fd, Vio *sd, int state); #define vio_close(vio) ((vio)->vioclose)(vio) #define vio_peer_addr(vio, buf, prt) (vio)->peer_addr(vio, buf, prt) #define vio_in_addr(vio, in) (vio)->in_addr(vio, in) -#define vio_timeout(vio, seconds) (vio)->timeout(vio, seconds) +#define vio_timeout(vio, which, seconds) (vio)->timeout(vio, which, seconds) #endif /* defined(HAVE_VIO) && !defined(DONT_MAP_VIO) */ /* This enumerator is used in parser - should be always visible */ @@ -180,7 +180,7 @@ struct st_vio my_bool (*should_retry)(Vio*); my_bool (*was_interrupted)(Vio*); int (*vioclose)(Vio*); - void (*timeout)(Vio*, unsigned int timeout); + void (*timeout)(Vio*, unsigned int which, unsigned int timeout); void *ssl_arg; #endif /* HAVE_VIO */ }; diff --git a/libmysqld/lib_vio.c b/libmysqld/lib_vio.c index 0f5f7fda550..883080e8b92 100644 --- a/libmysqld/lib_vio.c +++ b/libmysqld/lib_vio.c @@ -229,6 +229,7 @@ my_bool vio_poll_read(Vio *vio,uint timeout) void vio_timeout(Vio *vio __attribute__((unused)), + uint which __attribute__((unused)), uint timeout __attribute__((unused))) { } diff --git a/sql/net_serv.cc b/sql/net_serv.cc index 1e66bfc3e19..af54dd6b2f0 100644 --- a/sql/net_serv.cc +++ b/sql/net_serv.cc @@ -436,7 +436,7 @@ net_real_write(NET *net,const char *packet,ulong len) thr_alarm(&alarmed,(uint) net->write_timeout,&alarm_buff); #else alarmed=0; - vio_timeout(net->vio, net->write_timeout); + vio_timeout(net->vio, 1, net->write_timeout); #endif /* NO_ALARM */ pos=(char*) packet; end=pos+len; @@ -627,7 +627,7 @@ my_real_read(NET *net, ulong *complen) if (net_blocking) thr_alarm(&alarmed,net->read_timeout,&alarm_buff); #else - vio_timeout(net->vio, net->read_timeout); + vio_timeout(net->vio, 0, net->read_timeout); #endif /* NO_ALARM */ pos = net->buff + net->where_b; /* net->packet -4 */ diff --git a/vio/viosocket.c b/vio/viosocket.c index 7ea130c9949..2738f7743dc 100644 --- a/vio/viosocket.c +++ b/vio/viosocket.c @@ -345,12 +345,26 @@ my_bool vio_poll_read(Vio *vio,uint timeout) } -void vio_timeout(Vio *vio __attribute__((unused)), - uint timeout __attribute__((unused))) +void vio_timeout(Vio *vio, uint which, uint timeout) { +#if defined(SO_SNDTIMEO) && defined(SO_RCVTIMEO) + #ifdef __WIN__ - ulong wait_timeout= (ulong) timeout * 1000; - (void) setsockopt(vio->sd, SOL_SOCKET, SO_RCVTIMEO, (char*) &wait_timeout, - sizeof(wait_timeout)); -#endif /* __WIN__ */ + + /* Windows expects time in milliseconds as int. */ + int wait_timeout= (int) timeout * 1000; + +#else /* ! __WIN__ */ + + /* POSIX specifies time as struct timeval. */ + struct timeval wait_timeout; + wait_timeout.tv_sec= timeout; + wait_timeout.tv_usec= 0; + +#endif /* ! __WIN__ */ + + (void) setsockopt(vio->sd, SOL_SOCKET, which ? SO_SNDTIMEO : SO_RCVTIMEO, + (char*) &wait_timeout, sizeof(wait_timeout)); + +#endif /* defined(SO_SNDTIMEO) && defined(SO_RCVTIMEO) */ } From 8190b7231b14a4d8773730405edeb7fb07784650 Mon Sep 17 00:00:00 2001 From: "holyfoot/hf@mysql.com/deer.(none)" <> Date: Fri, 29 Sep 2006 17:56:02 +0500 Subject: [PATCH 2/8] bug #21888 (Query on GEOMETRY field crashes the server) RTree keys are really different from BTree and need specific paramters to be set by optimizer to work. Sometimes optimizer doesn't set those properly. Here we decided just to add code to check that the parameters are correct. Hope to fix optimizer sometimes. --- myisam/mi_range.c | 15 +++++++++++++++ mysql-test/r/gis-rtree.result | 11 +++++++++++ mysql-test/t/gis-rtree.test | 10 ++++++++++ 3 files changed, 36 insertions(+) diff --git a/myisam/mi_range.c b/myisam/mi_range.c index 1e0fd42334e..de042845d1e 100644 --- a/myisam/mi_range.c +++ b/myisam/mi_range.c @@ -71,6 +71,21 @@ ha_rows mi_records_in_range(MI_INFO *info, int inx, key_range *min_key, uchar * key_buff; uint start_key_len; + /* + The problem is that the optimizer doesn't support + RTree keys properly at the moment. + Hope this will be fixed some day. + But now NULL in the min_key means that we + didn't make the task for the RTree key + and expect BTree functionality from it. + As it's not able to handle such request + we return the error. + */ + if (!min_key) + { + res= HA_POS_ERROR; + break; + } key_buff= info->lastkey+info->s->base.max_key_length; start_key_len= _mi_pack_key(info,inx, key_buff, (uchar*) min_key->key, min_key->length, diff --git a/mysql-test/r/gis-rtree.result b/mysql-test/r/gis-rtree.result index 3fcae8843c0..bdb3de4de75 100644 --- a/mysql-test/r/gis-rtree.result +++ b/mysql-test/r/gis-rtree.result @@ -857,3 +857,14 @@ CHECK TABLE t1 EXTENDED; Table Op Msg_type Msg_text test.t1 check status OK DROP TABLE t1; +CREATE TABLE t1 (foo GEOMETRY NOT NULL, SPATIAL INDEX(foo) ); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(1,1))); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(1,0))); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(0,1))); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(0,0))); +SELECT 1 FROM t1 WHERE foo != PointFromWKB(POINT(0,0)); +1 +1 +1 +1 +DROP TABLE t1; diff --git a/mysql-test/t/gis-rtree.test b/mysql-test/t/gis-rtree.test index eba53a8a9c5..cdd8d1f3f0f 100644 --- a/mysql-test/t/gis-rtree.test +++ b/mysql-test/t/gis-rtree.test @@ -232,4 +232,14 @@ INSERT INTO t1 (c1) VALUES ( CHECK TABLE t1 EXTENDED; DROP TABLE t1; +# +# Bug #21888: Query on GEOMETRY field using PointFromWKB() results in lost connection +# +CREATE TABLE t1 (foo GEOMETRY NOT NULL, SPATIAL INDEX(foo) ); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(1,1))); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(1,0))); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(0,1))); +INSERT INTO t1 (foo) VALUES (PointFromWKB(POINT(0,0))); +SELECT 1 FROM t1 WHERE foo != PointFromWKB(POINT(0,0)); +DROP TABLE t1; # End of 4.1 tests From 0c9f941bb26da9e24ad2f59b4308333d6fb7fc78 Mon Sep 17 00:00:00 2001 From: "evgen@moonbone.local" <> Date: Fri, 29 Sep 2006 20:02:53 +0400 Subject: [PATCH 3/8] Fixed bug#20825: rollup puts non-equal values together Fix for bug 7894 replaces a field(s) in a non-aggregate function with a item reference if such a field was specified in the GROUP BY clause in order to get a correct result. When ROLLUP is involved this lead to a wrong result due to value of a such field is got through a copy function and copying happens after the function evaluation. Such replacement isn't needed if grouping is also done by such a function. The change_group_ref() function now isn't called for a function present in the group list. --- mysql-test/r/olap.result | 15 +++++++++++++++ mysql-test/t/olap.test | 9 +++++++++ sql/sql_select.cc | 7 ++++++- 3 files changed, 30 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/olap.result b/mysql-test/r/olap.result index fef990297d9..74b7570ea2a 100644 --- a/mysql-test/r/olap.result +++ b/mysql-test/r/olap.result @@ -541,3 +541,18 @@ a max(b) NULL 2 a 1 drop table t1; +create table t1 (a varchar(22) not null , b int); +insert into t1 values ("2006-07-01 21:30", 1), ("2006-07-01 23:30", 10); +select left(a,10), a, sum(b) from t1 group by 1,2 with rollup; +left(a,10) a sum(b) +2006-07-01 2006-07-01 21:30 1 +2006-07-01 2006-07-01 23:30 10 +2006-07-01 NULL 11 +NULL NULL 11 +select left(a,10) x, a, sum(b) from t1 group by x,a with rollup; +x a sum(b) +2006-07-01 2006-07-01 21:30 1 +2006-07-01 2006-07-01 23:30 10 +2006-07-01 NULL 11 +NULL NULL 11 +drop table t1; diff --git a/mysql-test/t/olap.test b/mysql-test/t/olap.test index 4f9790b0de6..683e1402678 100644 --- a/mysql-test/t/olap.test +++ b/mysql-test/t/olap.test @@ -272,4 +272,13 @@ select a, max(b) from t1 group by a with rollup; select distinct a, max(b) from t1 group by a with rollup; drop table t1; +# +# Bug #20825: rollup puts non-equal values together +# +create table t1 (a varchar(22) not null , b int); +insert into t1 values ("2006-07-01 21:30", 1), ("2006-07-01 23:30", 10); +select left(a,10), a, sum(b) from t1 group by 1,2 with rollup; +select left(a,10) x, a, sum(b) from t1 group by x,a with rollup; +drop table t1; + # End of 4.1 tests diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 74a9fc573c4..57b7fcceebe 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -9706,12 +9706,17 @@ bool JOIN::rollup_init() while ((item= it++)) { ORDER *group_tmp; + bool found_in_group= 0; + for (group_tmp= group_list; group_tmp; group_tmp= group_tmp->next) { if (*group_tmp->item == item) + { item->maybe_null= 1; + found_in_group= 1; + } } - if (item->type() == Item::FUNC_ITEM) + if (item->type() == Item::FUNC_ITEM && !found_in_group) { bool changed= FALSE; if (change_group_ref(thd, (Item_func *) item, group_list, &changed)) From 5f08a831863c28561b6b99f8f8246a3a24b2f2c2 Mon Sep 17 00:00:00 2001 From: "istruewing@chilla.local" <> Date: Mon, 9 Oct 2006 19:26:55 +0200 Subject: [PATCH 4/8] Bug#8283 - OPTIMIZE TABLE causes data loss OPTIMIZE TABLE with myisam_repair_threads > 1 performs a non-quick parallel repair. This means that it does not only rebuild all indexes, but also the data file. Non-quick parallel repair works so that there is one thread per index. The first of the threads rebuilds also the new data file. The problem was that all threads shared the read io cache on the old data file. If there were holes (deleted records) in the table, the first thread skipped them, writing only contiguous, non-deleted records to the new data file. Then it built the new index so that its entries pointed to the correct record positions. But the other threads didn't know the new record positions, but put the positions from the old data file into the index. The new design is so that there is a shared io cache which is filled by the first thread (the data file writer) with the new contiguous records and read by the other threads. Now they know the new record positions. Another problem was that for the parallel repair of compressed tables a common bit_buff and rec_buff was used. I changed it so that thread specific buffers are used for parallel repair. A similar problem existed for checksum calculation. I made this multi-thread safe too. --- include/my_sys.h | 22 +- include/myisam.h | 2 +- myisam/mi_check.c | 306 +++++++++++++++---- myisam/mi_open.c | 5 +- myisam/mi_packrec.c | 76 +++-- myisam/myisamdef.h | 30 +- myisam/sort.c | 126 ++++---- mysql-test/r/myisam.result | 126 ++++++++ mysql-test/t/myisam.test | 93 ++++++ mysys/mf_iocache.c | 584 +++++++++++++++++++++++++++++++------ 10 files changed, 1135 insertions(+), 235 deletions(-) diff --git a/include/my_sys.h b/include/my_sys.h index 02ea188a18e..92f3758a846 100644 --- a/include/my_sys.h +++ b/include/my_sys.h @@ -325,12 +325,18 @@ typedef int (*IO_CACHE_CALLBACK)(struct st_io_cache*); #ifdef THREAD typedef struct st_io_cache_share { - /* to sync on reads into buffer */ - pthread_mutex_t mutex; - pthread_cond_t cond; - int count, total; - /* actual IO_CACHE that filled the buffer */ - struct st_io_cache *active; + pthread_mutex_t mutex; /* To sync on reads into buffer. */ + pthread_cond_t cond; /* To wait for signals. */ + pthread_cond_t cond_writer; /* For a synchronized writer. */ + /* Offset in file corresponding to the first byte of buffer. */ + my_off_t pos_in_file; + /* If a synchronized write cache is the source of the data. */ + struct st_io_cache *source_cache; + byte *buffer; /* The read buffer. */ + byte *read_end; /* Behind last valid byte of buffer. */ + int running_threads; /* threads not in lock. */ + int total_threads; /* threads sharing the cache. */ + int error; /* Last error. */ #ifdef NOT_YET_IMPLEMENTED /* whether the structure should be free'd */ my_bool alloced; @@ -672,8 +678,8 @@ extern void setup_io_cache(IO_CACHE* info); extern int _my_b_read(IO_CACHE *info,byte *Buffer,uint Count); #ifdef THREAD extern int _my_b_read_r(IO_CACHE *info,byte *Buffer,uint Count); -extern void init_io_cache_share(IO_CACHE *info, - IO_CACHE_SHARE *s, uint num_threads); +extern void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare, + IO_CACHE *write_cache, uint num_threads); extern void remove_io_thread(IO_CACHE *info); #endif extern int _my_b_seq_read(IO_CACHE *info,byte *Buffer,uint Count); diff --git a/include/myisam.h b/include/myisam.h index c2d3d99a414..0a808070748 100644 --- a/include/myisam.h +++ b/include/myisam.h @@ -345,7 +345,7 @@ typedef struct st_mi_check_param uint testflag, key_cache_block_size; uint8 language; my_bool using_global_keycache, opt_lock_memory, opt_follow_links; - my_bool retry_repair, force_sort, calc_checksum; + my_bool retry_repair, force_sort; char temp_filename[FN_REFLEN],*isam_file_name; MY_TMPDIR *tmpdir; int tmpfile_createflag; diff --git a/myisam/mi_check.c b/myisam/mi_check.c index 2395640d5bf..35b56419367 100644 --- a/myisam/mi_check.c +++ b/myisam/mi_check.c @@ -16,6 +16,31 @@ /* Describe, check and repair of MyISAM tables */ +/* + About checksum calculation. + + There are two types of checksums. Table checksum and row checksum. + + Row checksum is an additional byte at the end of dynamic length + records. It must be calculated if the table is configured for them. + Otherwise they must not be used. The variable + MYISAM_SHARE::calc_checksum determines if row checksums are used. + MI_INFO::checksum is used as temporary storage during row handling. + For parallel repair we must assure that only one thread can use this + variable. There is no problem on the write side as this is done by one + thread only. But when checking a record after read this could go + wrong. But since all threads read through a common read buffer, it is + sufficient if only one thread checks it. + + Table checksum is an eight byte value in the header of the index file. + It can be calculated even if row checksums are not used. The variable + MI_CHECK::glob_crc is calculated over all records. + MI_SORT_PARAM::calc_checksum determines if this should be done. This + variable is not part of MI_CHECK because it must be set per thread for + parallel repair. The global glob_crc must be changed by one thread + only. And it is sufficient to calculate the checksum once only. +*/ + #include "ftdefs.h" #include #include @@ -41,8 +66,7 @@ static int chk_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, ha_checksum *key_checksum, uint level); static uint isam_key_length(MI_INFO *info,MI_KEYDEF *keyinfo); static ha_checksum calc_checksum(ha_rows count); -static int writekeys(MI_CHECK *param, MI_INFO *info,byte *buff, - my_off_t filepos); +static int writekeys(MI_SORT_PARAM *sort_param); static int sort_one_index(MI_CHECK *param, MI_INFO *info,MI_KEYDEF *keyinfo, my_off_t pagepos, File new_file); static int sort_key_read(MI_SORT_PARAM *sort_param,void *key); @@ -1101,7 +1125,8 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) goto err; start_recpos=pos; splits++; - VOID(_mi_pack_get_block_info(info,&block_info, -1, start_recpos)); + VOID(_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, -1, start_recpos)); pos=block_info.filepos+block_info.rec_len; if (block_info.rec_len < (uint) info->s->min_pack_length || block_info.rec_len > (uint) info->s->max_pack_length) @@ -1115,7 +1140,8 @@ int chk_data_link(MI_CHECK *param, MI_INFO *info,int extend) if (_mi_read_cache(¶m->read_cache,(byte*) info->rec_buff, block_info.filepos, block_info.rec_len, READING_NEXT)) goto err; - if (_mi_pack_rec_unpack(info,record,info->rec_buff,block_info.rec_len)) + if (_mi_pack_rec_unpack(info, &info->bit_buff, record, + info->rec_buff, block_info.rec_len)) { mi_check_print_error(param,"Found wrong record at %s", llstr(start_recpos,llbuff)); @@ -1394,7 +1420,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, info->state->empty=0; param->glob_crc=0; if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; + sort_param.calc_checksum= 1; info->update= (short) (HA_STATE_CHANGED | HA_STATE_ROW_CHANGED); for (i=0 ; i < info->s->base.keys ; i++) @@ -1418,7 +1444,7 @@ int mi_repair(MI_CHECK *param, register MI_INFO *info, lock_memory(param); /* Everything is alloced */ while (!(error=sort_get_next_record(&sort_param))) { - if (writekeys(param,info,(byte*)sort_param.record,sort_param.filepos)) + if (writekeys(&sort_param)) { if (my_errno != HA_ERR_FOUND_DUPP_KEY) goto err; @@ -1563,11 +1589,13 @@ err: /* Uppate keyfile when doing repair */ -static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff, - my_off_t filepos) +static int writekeys(MI_SORT_PARAM *sort_param) { register uint i; - uchar *key; + uchar *key; + MI_INFO *info= sort_param->sort_info->info; + byte *buff= sort_param->record; + my_off_t filepos= sort_param->filepos; DBUG_ENTER("writekeys"); key=info->lastkey+info->s->base.max_key_length; @@ -1621,8 +1649,8 @@ static int writekeys(MI_CHECK *param, register MI_INFO *info, byte *buff, } } /* Remove checksum that was added to glob_crc in sort_get_next_record */ - if (param->calc_checksum) - param->glob_crc-= info->checksum; + if (sort_param->calc_checksum) + sort_param->sort_info->param->glob_crc-= info->checksum; DBUG_PRINT("error",("errno: %d",my_errno)); DBUG_RETURN(-1); } /* writekeys */ @@ -2127,7 +2155,7 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, del=info->state->del; param->glob_crc=0; if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; + sort_param.calc_checksum= 1; rec_per_key_part= param->rec_per_key_part; for (sort_param.key=0 ; sort_param.key < share->base.keys ; @@ -2189,7 +2217,8 @@ int mi_repair_by_sort(MI_CHECK *param, register MI_INFO *info, param->retry_repair=1; goto err; } - param->calc_checksum=0; /* No need to calc glob_crc */ + /* No need to calculate checksum again. */ + sort_param.calc_checksum= 0; /* Set for next loop */ sort_info.max_records= (ha_rows) info->state->records; @@ -2352,6 +2381,28 @@ err: Each key is handled by a separate thread. TODO: make a number of threads a parameter + In parallel repair we use one thread per index. There are two modes: + + Quick + + Only the indexes are rebuilt. All threads share a read buffer. + Every thread that needs fresh data in the buffer enters the shared + cache lock. The last thread joining the lock reads the buffer from + the data file and wakes all other threads. + + Non-quick + + The data file is rebuilt and all indexes are rebuilt to point to + the new record positions. One thread is the master thread. It + reads from the old data file and writes to the new data file. It + also creates one of the indexes. The other threads read from a + buffer which is filled by the master. If they need fresh data, + they enter the shared cache lock. If the masters write buffer is + full, it flushes it to the new data file and enters the shared + cache lock too. When all threads joined in the lock, the master + copies its write buffer to the read buffer for the other threads + and wakes them. + RESULT 0 ok <>0 Error @@ -2374,6 +2425,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, ulong *rec_per_key_part; HA_KEYSEG *keyseg; char llbuff[22]; + IO_CACHE new_data_cache; /* For non-quick repair. */ IO_CACHE_SHARE io_share; SORT_INFO sort_info; ulonglong key_map=share->state.key_map; @@ -2395,19 +2447,55 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, if (info->s->options & (HA_OPTION_CHECKSUM | HA_OPTION_COMPRESS_RECORD)) param->testflag|=T_CALC_CHECKSUM; + /* + Quick repair (not touching data file, rebuilding indexes): + { + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + } + + Non-quick repair (rebuilding data file and indexes): + { + Master thread: + + Read cache is (MI_CHECK *param)->read_cache using info->dfile. + Write cache is (MI_INFO *info)->rec_cache using new_file. + + Slave threads: + + Read cache is new_data_cache synced to master rec_cache. + + The final assignment of the filedescriptor for rec_cache is done + after the cache creation. + + Don't check file size on new_data_cache, as the resulting file size + is not known yet. + + As rec_cache and new_data_cache are synced, write_buffer_length is + used for the read cache 'new_data_cache'. Both start at the same + position 'new_header_length'. + } + */ + DBUG_PRINT("info", ("is quick repair: %d", rep_quick)); bzero((char*)&sort_info,sizeof(sort_info)); + /* Initialize pthread structures before goto err. */ + pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&sort_info.cond, 0); + if (!(sort_info.key_block= - alloc_key_blocks(param, - (uint) param->sort_key_blocks, - share->base.max_key_block_length)) - || init_io_cache(¶m->read_cache,info->dfile, - (uint) param->read_buffer_length, - READ_CACHE,share->pack.header_length,1,MYF(MY_WME)) || - (! rep_quick && - init_io_cache(&info->rec_cache,info->dfile, - (uint) param->write_buffer_length, - WRITE_CACHE,new_header_length,1, - MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw))) + alloc_key_blocks(param, (uint) param->sort_key_blocks, + share->base.max_key_block_length)) || + init_io_cache(¶m->read_cache, info->dfile, + (uint) param->read_buffer_length, + READ_CACHE, share->pack.header_length, 1, MYF(MY_WME)) || + (!rep_quick && + (init_io_cache(&info->rec_cache, info->dfile, + (uint) param->write_buffer_length, + WRITE_CACHE, new_header_length, 1, + MYF(MY_WME | MY_WAIT_IF_FULL) & param->myf_rw) || + init_io_cache(&new_data_cache, -1, + (uint) param->write_buffer_length, + READ_CACHE, new_header_length, 1, + MYF(MY_WME | MY_DONT_CHECK_FILESIZE))))) goto err; sort_info.key_block_end=sort_info.key_block+param->sort_key_blocks; info->opt_flag|=WRITE_CACHE_USED; @@ -2497,8 +2585,6 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, del=info->state->del; param->glob_crc=0; - if (param->testflag & T_CALC_CHECKSUM) - param->calc_checksum=1; if (!(sort_param=(MI_SORT_PARAM *) my_malloc((uint) share->base.keys * @@ -2548,6 +2634,7 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_param[i].sort_info=&sort_info; sort_param[i].master=0; sort_param[i].fix_datafile=0; + sort_param[i].calc_checksum= 0; sort_param[i].filepos=new_header_length; sort_param[i].max_pos=sort_param[i].pos=share->pack.header_length; @@ -2584,19 +2671,45 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_info.total_keys=i; sort_param[0].master= 1; sort_param[0].fix_datafile= (my_bool)(! rep_quick); + sort_param[0].calc_checksum= test(param->testflag & T_CALC_CHECKSUM); sort_info.got_error=0; - pthread_mutex_init(&sort_info.mutex, MY_MUTEX_INIT_FAST); - pthread_cond_init(&sort_info.cond, 0); pthread_mutex_lock(&sort_info.mutex); - init_io_cache_share(¶m->read_cache, &io_share, i); + /* + Initialize the I/O cache share for use with the read caches and, in + case of non-quick repair, the write cache. When all threads join on + the cache lock, the writer copies the write cache contents to the + read caches. + */ + if (i > 1) + { + if (rep_quick) + init_io_cache_share(¶m->read_cache, &io_share, NULL, i); + else + init_io_cache_share(&new_data_cache, &io_share, &info->rec_cache, i); + } + else + io_share.total_threads= 0; /* share not used */ + (void) pthread_attr_init(&thr_attr); (void) pthread_attr_setdetachstate(&thr_attr,PTHREAD_CREATE_DETACHED); for (i=0 ; i < sort_info.total_keys ; i++) { - sort_param[i].read_cache=param->read_cache; + /* + Copy the properly initialized IO_CACHE structure so that every + thread has its own copy. In quick mode param->read_cache is shared + for use by all threads. In non-quick mode all threads but the + first copy the shared new_data_cache, which is synchronized to the + write cache of the first thread. The first thread copies + param->read_cache, which is not shared. + */ + sort_param[i].read_cache= ((rep_quick || !i) ? param->read_cache : + new_data_cache); + DBUG_PRINT("io_cache_share", ("thread: %u read_cache: 0x%lx", + i, (long) &sort_param[i].read_cache)); + /* two approaches: the same amount of memory for each thread or the memory for the same number of keys for each thread... @@ -2614,7 +2727,10 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, (void *) (sort_param+i))) { mi_check_print_error(param,"Cannot start a repair thread"); - remove_io_thread(¶m->read_cache); + /* Cleanup: Detach from the share. Avoid others to be blocked. */ + if (io_share.total_threads) + remove_io_thread(&sort_param[i].read_cache); + DBUG_PRINT("error", ("Cannot start a repair thread")); sort_info.got_error=1; } else @@ -2636,6 +2752,11 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, if (sort_param[0].fix_datafile) { + /* + Append some nuls to the end of a memory mapped file. Destroy the + write cache. The master thread did already detach from the share + by remove_io_thread() in sort.c:thr_find_all_keys(). + */ if (write_data_suffix(&sort_info,1) || end_io_cache(&info->rec_cache)) goto err; if (param->testflag & T_SAFE_REPAIR) @@ -2651,8 +2772,14 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, sort_param->filepos; /* Only whole records */ share->state.version=(ulong) time((time_t*) 0); + + /* + Exchange the data file descriptor of the table, so that we use the + new file from now on. + */ my_close(info->dfile,MYF(0)); info->dfile=new_file; + share->data_file_type=sort_info.new_data_file_type; share->pack.header_length=(ulong) new_header_length; } @@ -2707,7 +2834,20 @@ int mi_repair_parallel(MI_CHECK *param, register MI_INFO *info, err: got_error|= flush_blocks(param, share->key_cache, share->kfile); + /* + Destroy the write cache. The master thread did already detach from + the share by remove_io_thread() or it was not yet started (if the + error happend before creating the thread). + */ VOID(end_io_cache(&info->rec_cache)); + /* + Destroy the new data cache in case of non-quick repair. All slave + threads did either detach from the share by remove_io_thread() + already or they were not yet started (if the error happend before + creating the threads). + */ + if (!rep_quick) + VOID(end_io_cache(&new_data_cache)); if (!got_error) { /* Replace the actual file with the temporary file */ @@ -2838,12 +2978,41 @@ static int sort_ft_key_read(MI_SORT_PARAM *sort_param, void *key) } /* sort_ft_key_read */ - /* Read next record from file using parameters in sort_info */ - /* Return -1 if end of file, 0 if ok and > 0 if error */ +/* + Read next record from file using parameters in sort_info. + + SYNOPSIS + sort_get_next_record() + sort_param Information about and for the sort process + + NOTE + + Dynamic Records With Non-Quick Parallel Repair + + For non-quick parallel repair we use a synchronized read/write + cache. This means that one thread is the master who fixes the data + file by reading each record from the old data file and writing it + to the new data file. By doing this the records in the new data + file are written contiguously. Whenever the write buffer is full, + it is copied to the read buffer. The slaves read from the read + buffer, which is not associated with a file. Thus read_cache.file + is -1. When using _mi_read_cache(), the slaves must always set + flag to READING_NEXT so that the function never tries to read from + file. This is safe because the records are contiguous. There is no + need to read outside the cache. This condition is evaluated in the + variable 'parallel_flag' for quick reference. read_cache.file must + be >= 0 in every other case. + + RETURN + -1 end of file + 0 ok + > 0 error +*/ static int sort_get_next_record(MI_SORT_PARAM *sort_param) { int searching; + int parallel_flag; uint found_record,b_type,left_length; my_off_t pos; byte *to; @@ -2881,7 +3050,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) sort_param->max_pos=(sort_param->pos+=share->base.pack_reclength); if (*sort_param->record) { - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc+= (info->checksum= mi_static_checksum(info,sort_param->record)); DBUG_RETURN(0); @@ -2896,6 +3065,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) LINT_INIT(to); pos=sort_param->pos; searching=(sort_param->fix_datafile && (param->testflag & T_EXTEND)); + parallel_flag= (sort_param->read_cache.file < 0) ? READING_NEXT : 0; for (;;) { found_record=block_info.second_read= 0; @@ -2926,7 +3096,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) (byte*) block_info.header,pos, MI_BLOCK_INFO_HEADER_LENGTH, (! found_record ? READING_NEXT : 0) | - READING_HEADER)) + parallel_flag | READING_HEADER)) { if (found_record) { @@ -3103,9 +3273,31 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) llstr(sort_param->start_recpos,llbuff)); goto try_next; } - if (_mi_read_cache(&sort_param->read_cache,to,block_info.filepos, - block_info.data_len, - (found_record == 1 ? READING_NEXT : 0))) + /* + Copy information that is already read. Avoid accessing data + below the cache start. This could happen if the header + streched over the end of the previous buffer contents. + */ + { + uint header_len= (uint) (block_info.filepos - pos); + uint prefetch_len= (MI_BLOCK_INFO_HEADER_LENGTH - header_len); + + if (prefetch_len > block_info.data_len) + prefetch_len= block_info.data_len; + if (prefetch_len) + { + memcpy(to, block_info.header + header_len, prefetch_len); + block_info.filepos+= prefetch_len; + block_info.data_len-= prefetch_len; + left_length-= prefetch_len; + to+= prefetch_len; + } + } + if (block_info.data_len && + _mi_read_cache(&sort_param->read_cache,to,block_info.filepos, + block_info.data_len, + (found_record == 1 ? READING_NEXT : 0) | + parallel_flag)) { mi_check_print_info(param, "Read error for block at: %s (error: %d); Skipped", @@ -3135,13 +3327,14 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) { if (sort_param->read_cache.error < 0) DBUG_RETURN(1); - if (info->s->calc_checksum) - info->checksum=mi_checksum(info,sort_param->record); + if (sort_param->calc_checksum) + info->checksum= mi_checksum(info, sort_param->record); if ((param->testflag & (T_EXTEND | T_REP)) || searching) { if (_mi_rec_check(info, sort_param->record, sort_param->rec_buff, sort_param->find_length, (param->testflag & T_QUICK) && + sort_param->calc_checksum && test(info->s->calc_checksum))) { mi_check_print_info(param,"Found wrong packed record at %s", @@ -3149,7 +3342,7 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) goto try_next; } } - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc+= info->checksum; DBUG_RETURN(0); } @@ -3176,7 +3369,8 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) DBUG_RETURN(1); /* Something wrong with data */ } sort_param->start_recpos=sort_param->pos; - if (_mi_pack_get_block_info(info,&block_info,-1,sort_param->pos)) + if (_mi_pack_get_block_info(info, &sort_param->bit_buff, &block_info, + &sort_param->rec_buff, -1, sort_param->pos)) DBUG_RETURN(-1); if (!block_info.rec_len && sort_param->pos + MEMMAP_EXTRA_MARGIN == @@ -3200,15 +3394,14 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) llstr(sort_param->pos,llbuff)); continue; } - if (_mi_pack_rec_unpack(info,sort_param->record,sort_param->rec_buff, - block_info.rec_len)) + if (_mi_pack_rec_unpack(info, &sort_param->bit_buff, sort_param->record, + sort_param->rec_buff, block_info.rec_len)) { if (! searching) mi_check_print_info(param,"Found wrong record at %s", llstr(sort_param->pos,llbuff)); continue; } - info->checksum=mi_checksum(info,sort_param->record); if (!sort_param->fix_datafile) { sort_param->filepos=sort_param->pos; @@ -3218,8 +3411,9 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) sort_param->max_pos=(sort_param->pos=block_info.filepos+ block_info.rec_len); info->packed_length=block_info.rec_len; - if (param->calc_checksum) - param->glob_crc+= info->checksum; + if (sort_param->calc_checksum) + param->glob_crc+= (info->checksum= + mi_checksum(info, sort_param->record)); DBUG_RETURN(0); } } @@ -3227,7 +3421,20 @@ static int sort_get_next_record(MI_SORT_PARAM *sort_param) } - /* Write record to new file */ +/* + Write record to new file. + + SYNOPSIS + sort_write_record() + sort_param Sort parameters. + + NOTE + This is only called by a master thread if parallel repair is used. + + RETURN + 0 OK + 1 Error +*/ int sort_write_record(MI_SORT_PARAM *sort_param) { @@ -3276,6 +3483,7 @@ int sort_write_record(MI_SORT_PARAM *sort_param) } from=sort_info->buff+ALIGN_SIZE(MI_MAX_DYN_BLOCK_HEADER); } + /* We can use info->checksum here as only one thread calls this. */ info->checksum=mi_checksum(info,sort_param->record); reclength=_mi_rec_pack(info,from,sort_param->record); flag=0; @@ -3685,7 +3893,7 @@ static int sort_delete_record(MI_SORT_PARAM *sort_param) DBUG_RETURN(1); } } - if (param->calc_checksum) + if (sort_param->calc_checksum) param->glob_crc-=(*info->s->calc_checksum)(info, sort_param->record); } error=flush_io_cache(&info->rec_cache) || (*info->s->delete_record)(info); diff --git a/myisam/mi_open.c b/myisam/mi_open.c index a5b303f86d4..4f298397615 100644 --- a/myisam/mi_open.c +++ b/myisam/mi_open.c @@ -201,7 +201,10 @@ MI_INFO *mi_open(const char *name, int mode, uint open_flags) ((open_flags & HA_OPEN_ABORT_IF_CRASHED) && (my_disable_locking && share->state.open_count)))) { - DBUG_PRINT("error",("Table is marked as crashed")); + DBUG_PRINT("error",("Table is marked as crashed. open_flags: %u " + "changed: %u open_count: %u !locking: %d", + open_flags, share->state.changed, + share->state.open_count, my_disable_locking)); my_errno=((share->state.changed & STATE_CRASHED_ON_REPAIR) ? HA_ERR_CRASHED_ON_REPAIR : HA_ERR_CRASHED_ON_USAGE); goto err; diff --git a/myisam/mi_packrec.c b/myisam/mi_packrec.c index bd2d162d100..c425bf54b1a 100644 --- a/myisam/mi_packrec.c +++ b/myisam/mi_packrec.c @@ -101,7 +101,8 @@ static uint fill_and_get_bits(MI_BIT_BUFF *bit_buff,uint count); static void fill_buffer(MI_BIT_BUFF *bit_buff); static uint max_bit(uint value); #ifdef HAVE_MMAP -static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info, +static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, uchar *header); #endif @@ -428,13 +429,15 @@ int _mi_read_pack_record(MI_INFO *info, my_off_t filepos, byte *buf) DBUG_RETURN(-1); /* _search() didn't find record */ file=info->dfile; - if (_mi_pack_get_block_info(info, &block_info, file, filepos)) + if (_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, file, filepos)) goto err; if (my_read(file,(byte*) info->rec_buff + block_info.offset , block_info.rec_len - block_info.offset, MYF(MY_NABP))) goto panic; info->update|= HA_STATE_AKTIV; - DBUG_RETURN(_mi_pack_rec_unpack(info,buf,info->rec_buff,block_info.rec_len)); + DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); panic: my_errno=HA_ERR_WRONG_IN_RECORD; err: @@ -443,8 +446,8 @@ err: -int _mi_pack_rec_unpack(register MI_INFO *info, register byte *to, byte *from, - ulong reclength) +int _mi_pack_rec_unpack(register MI_INFO *info, MI_BIT_BUFF *bit_buff, + register byte *to, byte *from, ulong reclength) { byte *end_field; reg3 MI_COLUMNDEF *end; @@ -452,18 +455,18 @@ int _mi_pack_rec_unpack(register MI_INFO *info, register byte *to, byte *from, MYISAM_SHARE *share=info->s; DBUG_ENTER("_mi_pack_rec_unpack"); - init_bit_buffer(&info->bit_buff, (uchar*) from,reclength); + init_bit_buffer(bit_buff, (uchar*) from, reclength); for (current_field=share->rec, end=current_field+share->base.fields ; current_field < end ; current_field++,to=end_field) { end_field=to+current_field->length; - (*current_field->unpack)(current_field,&info->bit_buff,(uchar*) to, + (*current_field->unpack)(current_field, bit_buff, (uchar*) to, (uchar*) end_field); } - if (! info->bit_buff.error && - info->bit_buff.pos - info->bit_buff.bits/8 == info->bit_buff.end) + if (!bit_buff->error && + bit_buff->pos - bit_buff->bits / 8 == bit_buff->end) DBUG_RETURN(0); info->update&= ~HA_STATE_AKTIV; DBUG_RETURN(my_errno=HA_ERR_WRONG_IN_RECORD); @@ -977,13 +980,16 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, if (info->opt_flag & READ_CACHE_USED) { - if (_mi_read_cache(&info->rec_cache,(byte*) block_info.header,filepos, - share->pack.ref_length, skip_deleted_blocks)) + if (_mi_read_cache(&info->rec_cache, (byte*) block_info.header, + filepos, share->pack.ref_length, + skip_deleted_blocks ? READING_NEXT : 0)) goto err; - b_type=_mi_pack_get_block_info(info,&block_info,-1, filepos); + b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, -1, filepos); } else - b_type=_mi_pack_get_block_info(info,&block_info,info->dfile,filepos); + b_type=_mi_pack_get_block_info(info, &info->bit_buff, &block_info, + &info->rec_buff, info->dfile, filepos); if (b_type) goto err; /* Error code is already set */ #ifndef DBUG_OFF @@ -996,9 +1002,9 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, if (info->opt_flag & READ_CACHE_USED) { - if (_mi_read_cache(&info->rec_cache,(byte*) info->rec_buff, - block_info.filepos, block_info.rec_len, - skip_deleted_blocks)) + if (_mi_read_cache(&info->rec_cache, (byte*) info->rec_buff, + block_info.filepos, block_info.rec_len, + skip_deleted_blocks ? READING_NEXT : 0)) goto err; } else @@ -1013,8 +1019,8 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, info->nextpos=block_info.filepos+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; - DBUG_RETURN (_mi_pack_rec_unpack(info,buf,info->rec_buff, - block_info.rec_len)); + DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf, + info->rec_buff, block_info.rec_len)); err: DBUG_RETURN(my_errno); } @@ -1022,8 +1028,9 @@ int _mi_read_rnd_pack_record(MI_INFO *info, byte *buf, /* Read and process header from a huff-record-file */ -uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BLOCK_INFO *info, File file, - my_off_t filepos) +uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, + File file, my_off_t filepos) { uchar *header=info->header; uint head_length,ref_length; @@ -1048,17 +1055,17 @@ uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BLOCK_INFO *info, File file, head_length+= read_pack_length((uint) myisam->s->pack.version, header + head_length, &info->blob_len); if (!(mi_alloc_rec_buff(myisam,info->rec_len + info->blob_len, - &myisam->rec_buff))) + rec_buff_p))) return BLOCK_FATAL_ERROR; /* not enough memory */ - myisam->bit_buff.blob_pos=(uchar*) myisam->rec_buff+info->rec_len; - myisam->bit_buff.blob_end= myisam->bit_buff.blob_pos+info->blob_len; + bit_buff->blob_pos= (uchar*) *rec_buff_p + info->rec_len; + bit_buff->blob_end= bit_buff->blob_pos + info->blob_len; myisam->blob_length=info->blob_len; } info->filepos=filepos+head_length; if (file > 0) { info->offset=min(info->rec_len, ref_length - head_length); - memcpy(myisam->rec_buff, header+head_length, info->offset); + memcpy(*rec_buff_p, header + head_length, info->offset); } return 0; } @@ -1198,7 +1205,8 @@ void _mi_unmap_file(MI_INFO *info) } -static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info, +static uchar *_mi_mempack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, uchar *header) { header+= read_pack_length((uint) myisam->s->pack.version, header, @@ -1209,10 +1217,10 @@ static uchar *_mi_mempack_get_block_info(MI_INFO *myisam,MI_BLOCK_INFO *info, &info->blob_len); /* mi_alloc_rec_buff sets my_errno on error */ if (!(mi_alloc_rec_buff(myisam, info->blob_len, - &myisam->rec_buff))) + rec_buff_p))) return 0; /* not enough memory */ - myisam->bit_buff.blob_pos=(uchar*) myisam->rec_buff; - myisam->bit_buff.blob_end= (uchar*) myisam->rec_buff + info->blob_len; + bit_buff->blob_pos= (uchar*) *rec_buff_p; + bit_buff->blob_end= (uchar*) *rec_buff_p + info->blob_len; } return header; } @@ -1228,11 +1236,13 @@ static int _mi_read_mempack_record(MI_INFO *info, my_off_t filepos, byte *buf) if (filepos == HA_OFFSET_ERROR) DBUG_RETURN(-1); /* _search() didn't find record */ - if (!(pos= (byte*) _mi_mempack_get_block_info(info,&block_info, + if (!(pos= (byte*) _mi_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, (uchar*) share->file_map+ filepos))) DBUG_RETURN(-1); - DBUG_RETURN(_mi_pack_rec_unpack(info, buf, pos, block_info.rec_len)); + DBUG_RETURN(_mi_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); } @@ -1252,7 +1262,8 @@ static int _mi_read_rnd_mempack_record(MI_INFO *info, byte *buf, my_errno=HA_ERR_END_OF_FILE; goto err; } - if (!(pos= (byte*) _mi_mempack_get_block_info(info,&block_info, + if (!(pos= (byte*) _mi_mempack_get_block_info(info, &info->bit_buff, + &block_info, &info->rec_buff, (uchar*) (start=share->file_map+ filepos)))) @@ -1269,7 +1280,8 @@ static int _mi_read_rnd_mempack_record(MI_INFO *info, byte *buf, info->nextpos=filepos+(uint) (pos-start)+block_info.rec_len; info->update|= HA_STATE_AKTIV | HA_STATE_KEY_CHANGED; - DBUG_RETURN (_mi_pack_rec_unpack(info,buf,pos, block_info.rec_len)); + DBUG_RETURN (_mi_pack_rec_unpack(info, &info->bit_buff, buf, + pos, block_info.rec_len)); err: DBUG_RETURN(my_errno); } diff --git a/myisam/myisamdef.h b/myisam/myisamdef.h index a766d59d72a..6365f0e1b0c 100644 --- a/myisam/myisamdef.h +++ b/myisam/myisamdef.h @@ -75,7 +75,7 @@ typedef struct st_mi_state_info ulong sec_index_changed; /* Updated when new sec_index */ ulong sec_index_used; /* which extra index are in use */ ulonglong key_map; /* Which keys are in use */ - ha_checksum checksum; + ha_checksum checksum; /* Table checksum */ ulong version; /* timestamp of create */ time_t create_time; /* Time when created database */ time_t recover_time; /* Time for last recover */ @@ -176,6 +176,7 @@ typedef struct st_mi_isam_share { /* Shared between opens */ int (*delete_record)(struct st_myisam_info*); int (*read_rnd)(struct st_myisam_info*, byte*, my_off_t, my_bool); int (*compare_record)(struct st_myisam_info*, const byte *); + /* Function to use for a row checksum. */ ha_checksum (*calc_checksum)(struct st_myisam_info*, const byte *); int (*compare_unique)(struct st_myisam_info*, MI_UNIQUEDEF *, const byte *record, my_off_t pos); @@ -249,7 +250,7 @@ struct st_myisam_info { my_off_t last_keypage; /* Last key page read */ my_off_t last_search_keypage; /* Last keypage when searching */ my_off_t dupp_key_pos; - ha_checksum checksum; + ha_checksum checksum; /* Temp storage for row checksum */ /* QQ: the folloing two xxx_length fields should be removed, as they are not compatible with parallel repair */ ulong packed_length,blob_length; /* Length of found, packed record */ @@ -297,8 +298,9 @@ typedef struct st_mi_sort_param pthread_t thr; IO_CACHE read_cache, tempfile, tempfile_for_exceptions; DYNAMIC_ARRAY buffpek; - - /* + MI_BIT_BUFF bit_buff; /* For parallel repair of packrec. */ + + /* The next two are used to collect statistics, see update_key_parts for description. */ @@ -309,6 +311,7 @@ typedef struct st_mi_sort_param uint key, key_length,real_key_length,sortbuff_size; uint maxbuffers, keys, find_length, sort_keys_length; my_bool fix_datafile, master; + my_bool calc_checksum; /* calculate table checksum */ MI_KEYDEF *keyinfo; HA_KEYSEG *seg; SORT_INFO *sort_info; @@ -361,8 +364,15 @@ typedef struct st_mi_sort_param #define mi_putint(x,y,nod) { uint16 boh=(nod ? (uint16) 32768 : 0) + (uint16) (y);\ mi_int2store(x,boh); } #define mi_test_if_nod(x) (x[0] & 128 ? info->s->base.key_reflength : 0) -#define mi_mark_crashed(x) (x)->s->state.changed|=STATE_CRASHED -#define mi_mark_crashed_on_repair(x) { (x)->s->state.changed|=STATE_CRASHED|STATE_CRASHED_ON_REPAIR ; (x)->update|= HA_STATE_CHANGED; } +#define mi_mark_crashed(x) do{(x)->s->state.changed|= STATE_CRASHED; \ + DBUG_PRINT("error", ("Marked table crashed")); \ + }while(0) +#define mi_mark_crashed_on_repair(x) do{(x)->s->state.changed|= \ + STATE_CRASHED|STATE_CRASHED_ON_REPAIR; \ + (x)->update|= HA_STATE_CHANGED; \ + DBUG_PRINT("error", \ + ("Marked table crashed")); \ + }while(0) #define mi_is_crashed(x) ((x)->s->state.changed & STATE_CRASHED) #define mi_is_crashed_on_repair(x) ((x)->s->state.changed & STATE_CRASHED_ON_REPAIR) @@ -600,8 +610,8 @@ extern void _mi_print_key(FILE *stream,HA_KEYSEG *keyseg,const uchar *key, extern my_bool _mi_read_pack_info(MI_INFO *info,pbool fix_keys); extern int _mi_read_pack_record(MI_INFO *info,my_off_t filepos,byte *buf); extern int _mi_read_rnd_pack_record(MI_INFO*, byte *,my_off_t, my_bool); -extern int _mi_pack_rec_unpack(MI_INFO *info,byte *to,byte *from, - ulong reclength); +extern int _mi_pack_rec_unpack(MI_INFO *info, MI_BIT_BUFF *bit_buff, + byte *to, byte *from, ulong reclength); extern ulonglong mi_safe_mul(ulonglong a,ulonglong b); extern int _mi_ft_update(MI_INFO *info, uint keynr, byte *keybuf, const byte *oldrec, const byte *newrec, my_off_t pos); @@ -666,7 +676,9 @@ extern "C" { extern uint _mi_get_block_info(MI_BLOCK_INFO *,File, my_off_t); extern uint _mi_rec_pack(MI_INFO *info,byte *to,const byte *from); -extern uint _mi_pack_get_block_info(MI_INFO *, MI_BLOCK_INFO *, File, my_off_t); +extern uint _mi_pack_get_block_info(MI_INFO *myisam, MI_BIT_BUFF *bit_buff, + MI_BLOCK_INFO *info, byte **rec_buff_p, + File file, my_off_t filepos); extern void _my_store_blob_length(byte *pos,uint pack_length,uint length); extern void _myisam_log(enum myisam_log_commands command,MI_INFO *info, const byte *buffert,uint length); diff --git a/myisam/sort.c b/myisam/sort.c index 96b55d599c8..2e6fd20ade8 100644 --- a/myisam/sort.c +++ b/myisam/sort.c @@ -309,7 +309,7 @@ static ha_rows NEAR_F find_all_keys(MI_SORT_PARAM *info, uint keys, pthread_handler_decl(thr_find_all_keys,arg) { - MI_SORT_PARAM *info= (MI_SORT_PARAM*) arg; + MI_SORT_PARAM *sort_param= (MI_SORT_PARAM*) arg; int error; uint memavl,old_memavl,keys,sort_length; uint idx, maxbuffer; @@ -321,32 +321,34 @@ pthread_handler_decl(thr_find_all_keys,arg) if (my_thread_init()) goto err; - if (info->sort_info->got_error) + DBUG_ENTER("thr_find_all_keys"); + DBUG_PRINT("enter", ("master: %d", sort_param->master)); + if (sort_param->sort_info->got_error) goto err; - if (info->keyinfo->flag && HA_VAR_LENGTH_KEY) + if (sort_param->keyinfo->flag && HA_VAR_LENGTH_KEY) { - info->write_keys=write_keys_varlen; - info->read_to_buffer=read_to_buffer_varlen; - info->write_key=write_merge_key_varlen; + sort_param->write_keys= write_keys_varlen; + sort_param->read_to_buffer= read_to_buffer_varlen; + sort_param->write_key= write_merge_key_varlen; } else { - info->write_keys=write_keys; - info->read_to_buffer=read_to_buffer; - info->write_key=write_merge_key; + sort_param->write_keys= write_keys; + sort_param->read_to_buffer= read_to_buffer; + sort_param->write_key= write_merge_key; } - my_b_clear(&info->tempfile); - my_b_clear(&info->tempfile_for_exceptions); - bzero((char*) &info->buffpek,sizeof(info->buffpek)); - bzero((char*) &info->unique, sizeof(info->unique)); + my_b_clear(&sort_param->tempfile); + my_b_clear(&sort_param->tempfile_for_exceptions); + bzero((char*) &sort_param->buffpek, sizeof(sort_param->buffpek)); + bzero((char*) &sort_param->unique, sizeof(sort_param->unique)); sort_keys= (uchar **) NULL; - memavl=max(info->sortbuff_size, MIN_SORT_MEMORY); - idx= info->sort_info->max_records; - sort_length= info->key_length; - maxbuffer= 1; + memavl= max(sort_param->sortbuff_size, MIN_SORT_MEMORY); + idx= sort_param->sort_info->max_records; + sort_length= sort_param->key_length; + maxbuffer= 1; while (memavl >= MIN_SORT_MEMORY) { @@ -363,18 +365,19 @@ pthread_handler_decl(thr_find_all_keys,arg) (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ (sort_length+sizeof(char*))) <= 1) { - mi_check_print_error(info->sort_info->param, + mi_check_print_error(sort_param->sort_info->param, "sort_buffer_size is to small"); goto err; } } while ((maxbuffer= (int) (idx/(keys-1)+1)) != skr); } - if ((sort_keys=(uchar **)my_malloc(keys*(sort_length+sizeof(char*))+ - ((info->keyinfo->flag & HA_FULLTEXT) ? - HA_FT_MAXBYTELEN : 0), MYF(0)))) + if ((sort_keys= (uchar**) + my_malloc(keys*(sort_length+sizeof(char*))+ + ((sort_param->keyinfo->flag & HA_FULLTEXT) ? + HA_FT_MAXBYTELEN : 0), MYF(0)))) { - if (my_init_dynamic_array(&info->buffpek, sizeof(BUFFPEK), + if (my_init_dynamic_array(&sort_param->buffpek, sizeof(BUFFPEK), maxbuffer, maxbuffer/2)) my_free((gptr) sort_keys,MYF(0)); else @@ -386,69 +389,87 @@ pthread_handler_decl(thr_find_all_keys,arg) } if (memavl < MIN_SORT_MEMORY) { - mi_check_print_error(info->sort_info->param,"Sort buffer to small"); /* purecov: tested */ + mi_check_print_error(sort_param->sort_info->param, "Sort buffer too small"); goto err; /* purecov: tested */ } - if (info->sort_info->param->testflag & T_VERBOSE) - printf("Key %d - Allocating buffer for %d keys\n",info->key+1,keys); - info->sort_keys=sort_keys; + if (sort_param->sort_info->param->testflag & T_VERBOSE) + printf("Key %d - Allocating buffer for %d keys\n", + sort_param->key + 1, keys); + sort_param->sort_keys= sort_keys; idx=error=0; sort_keys[0]=(uchar*) (sort_keys+keys); - while (!(error=info->sort_info->got_error) && - !(error=(*info->key_read)(info,sort_keys[idx]))) + DBUG_PRINT("info", ("reading keys")); + while (!(error= sort_param->sort_info->got_error) && + !(error= (*sort_param->key_read)(sort_param, sort_keys[idx]))) { - if (info->real_key_length > info->key_length) + if (sort_param->real_key_length > sort_param->key_length) { - if (write_key(info,sort_keys[idx], &info->tempfile_for_exceptions)) + if (write_key(sort_param, sort_keys[idx], + &sort_param->tempfile_for_exceptions)) goto err; continue; } if (++idx == keys) { - if (info->write_keys(info,sort_keys,idx-1, - (BUFFPEK *)alloc_dynamic(&info->buffpek), - &info->tempfile)) + if (sort_param->write_keys(sort_param, sort_keys, idx - 1, + (BUFFPEK*) alloc_dynamic(&sort_param->buffpek), + &sort_param->tempfile)) goto err; sort_keys[0]=(uchar*) (sort_keys+keys); - memcpy(sort_keys[0],sort_keys[idx-1],(size_t) info->key_length); + memcpy(sort_keys[0], sort_keys[idx - 1], (size_t) sort_param->key_length); idx=1; } - sort_keys[idx]=sort_keys[idx-1]+info->key_length; + sort_keys[idx]= sort_keys[idx - 1] + sort_param->key_length; } if (error > 0) goto err; - if (info->buffpek.elements) + if (sort_param->buffpek.elements) { - if (info->write_keys(info,sort_keys, idx, - (BUFFPEK *) alloc_dynamic(&info->buffpek), &info->tempfile)) + if (sort_param->write_keys(sort_param, sort_keys, idx, + (BUFFPEK*) alloc_dynamic(&sort_param->buffpek), + &sort_param->tempfile)) goto err; - info->keys=(info->buffpek.elements-1)*(keys-1)+idx; + sort_param->keys= (sort_param->buffpek.elements - 1) * (keys - 1) + idx; } else - info->keys=idx; + sort_param->keys= idx; - info->sort_keys_length=keys; + sort_param->sort_keys_length= keys; goto ok; err: - info->sort_info->got_error=1; /* no need to protect this with a mutex */ + DBUG_PRINT("error", ("got some error")); + sort_param->sort_info->got_error= 1; /* no need to protect with a mutex */ if (sort_keys) my_free((gptr) sort_keys,MYF(0)); - info->sort_keys=0; - delete_dynamic(& info->buffpek); - close_cached_file(&info->tempfile); - close_cached_file(&info->tempfile_for_exceptions); + sort_param->sort_keys= 0; + delete_dynamic(& sort_param->buffpek); + close_cached_file(&sort_param->tempfile); + close_cached_file(&sort_param->tempfile_for_exceptions); ok: - remove_io_thread(&info->read_cache); - pthread_mutex_lock(&info->sort_info->mutex); - info->sort_info->threads_running--; - pthread_cond_signal(&info->sort_info->cond); - pthread_mutex_unlock(&info->sort_info->mutex); + /* + Detach from the share if the writer is involved. Avoid others to + be blocked. This includes a flush of the write buffer. This will + also indicate EOF to the readers. + */ + if (sort_param->sort_info->info->rec_cache.share) + remove_io_thread(&sort_param->sort_info->info->rec_cache); + + /* Readers detach from the share if any. Avoid others to be blocked. */ + if (sort_param->read_cache.share) + remove_io_thread(&sort_param->read_cache); + + pthread_mutex_lock(&sort_param->sort_info->mutex); + if (!--sort_param->sort_info->threads_running) + pthread_cond_signal(&sort_param->sort_info->cond); + pthread_mutex_unlock(&sort_param->sort_info->mutex); + + DBUG_PRINT("exit", ("======== ending thread ========")); my_thread_end(); return NULL; } @@ -466,6 +487,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) MYISAM_SHARE *share=info->s; MI_SORT_PARAM *sinfo; byte *mergebuf=0; + DBUG_ENTER("thr_write_keys"); LINT_INIT(length); for (i= 0, sinfo= sort_param ; @@ -604,7 +626,7 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) } } my_free((gptr) mergebuf,MYF(MY_ALLOW_ZERO_PTR)); - return got_error; + DBUG_RETURN(got_error); } #endif /* THREAD */ diff --git a/mysql-test/r/myisam.result b/mysql-test/r/myisam.result index 7aaf3c8f85a..cb3e6afffa4 100644 --- a/mysql-test/r/myisam.result +++ b/mysql-test/r/myisam.result @@ -756,3 +756,129 @@ a b xxxxxxxxx bbbbbb xxxxxxxxx bbbbbb DROP TABLE t1; +SET @@myisam_repair_threads=2; +SHOW VARIABLES LIKE 'myisam_repair%'; +Variable_name Value +myisam_repair_threads 2 +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +OPTIMIZE TABLE t1; +Table Op Msg_type Msg_text +test.t1 optimize status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 0 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +CREATE TABLE t1 ( +`_id` int(11) NOT NULL default '0', +`url` text, +`email` text, +`description` text, +`loverlap` int(11) default NULL, +`roverlap` int(11) default NULL, +`lneighbor_id` int(11) default NULL, +`rneighbor_id` int(11) default NULL, +`length_` int(11) default NULL, +`sequence` mediumtext, +`name` text, +`_obj_class` text NOT NULL, +PRIMARY KEY (`_id`), +UNIQUE KEY `sequence_name_index` (`name`(50)), +KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +INSERT INTO t1 VALUES +(1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), +(2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), +(3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), +(4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), +(5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), +(6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), +(7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), +(8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), +(9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +SELECT _id FROM t1; +_id +1 +2 +3 +4 +5 +6 +7 +8 +9 +DELETE FROM t1 WHERE _id < 8; +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # # +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +REPAIR TABLE t1 QUICK; +Table Op Msg_type Msg_text +test.t1 repair status OK +CHECK TABLE t1 EXTENDED; +Table Op Msg_type Msg_text +test.t1 check status OK +SHOW TABLE STATUS LIKE 't1'; +Name Engine Version Row_format Rows Avg_row_length Data_length Max_data_length Index_length Data_free Auto_increment Create_time Update_time Check_time Collation Checksum Create_options Comment +t1 MyISAM 9 Dynamic 2 # # # # 140 # # # # # # +SELECT _id FROM t1; +_id +8 +9 +DROP TABLE t1; +SET @@myisam_repair_threads=1; +SHOW VARIABLES LIKE 'myisam_repair%'; +Variable_name Value +myisam_repair_threads 1 diff --git a/mysql-test/t/myisam.test b/mysql-test/t/myisam.test index 5f948973141..0b9fad07ac4 100644 --- a/mysql-test/t/myisam.test +++ b/mysql-test/t/myisam.test @@ -714,4 +714,97 @@ UPDATE t1 AS ta1,t1 AS ta2 SET ta1.b='aaaaaa',ta2.b='bbbbbb'; SELECT * FROM t1; DROP TABLE t1; +# +# Bug#8283 - OPTIMIZE TABLE causes data loss +# +SET @@myisam_repair_threads=2; +SHOW VARIABLES LIKE 'myisam_repair%'; +# +# Test OPTIMIZE. This creates a new data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +OPTIMIZE TABLE t1; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +# Test REPAIR QUICK. This retains the old data file. +CREATE TABLE t1 ( + `_id` int(11) NOT NULL default '0', + `url` text, + `email` text, + `description` text, + `loverlap` int(11) default NULL, + `roverlap` int(11) default NULL, + `lneighbor_id` int(11) default NULL, + `rneighbor_id` int(11) default NULL, + `length_` int(11) default NULL, + `sequence` mediumtext, + `name` text, + `_obj_class` text NOT NULL, + PRIMARY KEY (`_id`), + UNIQUE KEY `sequence_name_index` (`name`(50)), + KEY (`length_`) +) ENGINE=MyISAM DEFAULT CHARSET=latin1; +# +INSERT INTO t1 VALUES + (1,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample1',''), + (2,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample2',''), + (3,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample3',''), + (4,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample4',''), + (5,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample5',''), + (6,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample6',''), + (7,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample7',''), + (8,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample8',''), + (9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'sample9',''); +# +SELECT _id FROM t1; +DELETE FROM t1 WHERE _id < 8; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +CHECK TABLE t1 EXTENDED; +REPAIR TABLE t1 QUICK; +CHECK TABLE t1 EXTENDED; +--replace_column 6 # 7 # 8 # 9 # 11 # 12 # 13 # 14 # 15 # 16 # +SHOW TABLE STATUS LIKE 't1'; +SELECT _id FROM t1; +DROP TABLE t1; +# +SET @@myisam_repair_threads=1; +SHOW VARIABLES LIKE 'myisam_repair%'; + # End of 4.1 tests diff --git a/mysys/mf_iocache.c b/mysys/mf_iocache.c index 0007784c2b2..58d4756b702 100644 --- a/mysys/mf_iocache.c +++ b/mysys/mf_iocache.c @@ -70,7 +70,6 @@ static void my_aiowait(my_aio_result *result); #define IO_ROUND_UP(X) (((X)+IO_SIZE-1) & ~(IO_SIZE-1)) #define IO_ROUND_DN(X) ( (X) & ~(IO_SIZE-1)) - /* Setup internal pointers inside IO_CACHE @@ -500,65 +499,366 @@ int _my_b_read(register IO_CACHE *info, byte *Buffer, uint Count) DBUG_RETURN(0); } + #ifdef THREAD -/* Prepare IO_CACHE for shared use */ -void init_io_cache_share(IO_CACHE *info, IO_CACHE_SHARE *s, uint num_threads) +/* + Prepare IO_CACHE for shared use. + + SYNOPSIS + init_io_cache_share() + read_cache A read cache. This will be copied for + every thread after setup. + cshare The share. + write_cache If non-NULL a write cache that is to be + synchronized with the read caches. + num_threads Number of threads sharing the cache + including the write thread if any. + + DESCRIPTION + + The shared cache is used so: One IO_CACHE is initialized with + init_io_cache(). This includes the allocation of a buffer. Then a + share is allocated and init_io_cache_share() is called with the io + cache and the share. Then the io cache is copied for each thread. So + every thread has its own copy of IO_CACHE. But the allocated buffer + is shared because cache->buffer is the same for all caches. + + One thread reads data from the file into the buffer. All threads + read from the buffer, but every thread maintains its own set of + pointers into the buffer. When all threads have used up the buffer + contents, one of the threads reads the next block of data into the + buffer. To accomplish this, each thread enters the cache lock before + accessing the buffer. They wait in lock_io_cache() until all threads + joined the lock. The last thread entering the lock is in charge of + reading from file to buffer. It wakes all threads when done. + + Synchronizing a write cache to the read caches works so: Whenever + the write buffer needs a flush, the write thread enters the lock and + waits for all other threads to enter the lock too. They do this when + they have used up the read buffer. When all threads are in the lock, + the write thread copies the write buffer to the read buffer and + wakes all threads. + + share->running_threads is the number of threads not being in the + cache lock. When entering lock_io_cache() the number is decreased. + When the thread that fills the buffer enters unlock_io_cache() the + number is reset to the number of threads. The condition + running_threads == 0 means that all threads are in the lock. Bumping + up the number to the full count is non-intuitive. But increasing the + number by one for each thread that leaves the lock could lead to a + solo run of one thread. The last thread to join a lock reads from + file to buffer, wakes the other threads, processes the data in the + cache and enters the lock again. If no other thread left the lock + meanwhile, it would think it's the last one again and read the next + block... + + The share has copies of 'error', 'buffer', 'read_end', and + 'pos_in_file' from the thread that filled the buffer. We may not be + able to access this information directly from its cache because the + thread may be removed from the share before the variables could be + copied by all other threads. Or, if a write buffer is synchronized, + it would change its 'pos_in_file' after waking the other threads, + possibly before they could copy its value. + + However, the 'buffer' variable in the share is for a synchronized + write cache. It needs to know where to put the data. Otherwise it + would need access to the read cache of one of the threads that is + not yet removed from the share. + + RETURN + void +*/ + +void init_io_cache_share(IO_CACHE *read_cache, IO_CACHE_SHARE *cshare, + IO_CACHE *write_cache, uint num_threads) { - DBUG_ASSERT(info->type == READ_CACHE); - pthread_mutex_init(&s->mutex, MY_MUTEX_INIT_FAST); - pthread_cond_init (&s->cond, 0); - s->total=s->count=num_threads-1; - s->active=0; - info->share=s; - info->read_function=_my_b_read_r; - info->current_pos= info->current_end= 0; + DBUG_ENTER("init_io_cache_share"); + DBUG_PRINT("io_cache_share", ("read_cache: 0x%lx share: 0x%lx " + "write_cache: 0x%lx threads: %u", + read_cache, cshare, write_cache, num_threads)); + + DBUG_ASSERT(num_threads > 1); + DBUG_ASSERT(read_cache->type == READ_CACHE); + DBUG_ASSERT(!write_cache || (write_cache->type == WRITE_CACHE)); + + pthread_mutex_init(&cshare->mutex, MY_MUTEX_INIT_FAST); + pthread_cond_init(&cshare->cond, 0); + pthread_cond_init(&cshare->cond_writer, 0); + + cshare->running_threads= num_threads; + cshare->total_threads= num_threads; + cshare->error= 0; /* Initialize. */ + cshare->buffer= read_cache->buffer; + cshare->read_end= NULL; /* See function comment of lock_io_cache(). */ + cshare->pos_in_file= 0; /* See function comment of lock_io_cache(). */ + cshare->source_cache= write_cache; /* Can be NULL. */ + + read_cache->share= cshare; + read_cache->read_function= _my_b_read_r; + read_cache->current_pos= NULL; + read_cache->current_end= NULL; + + if (write_cache) + write_cache->share= cshare; + + DBUG_VOID_RETURN; } + /* - Remove a thread from shared access to IO_CACHE - Every thread should do that on exit for not - to deadlock other threads + Remove a thread from shared access to IO_CACHE. + + SYNOPSIS + remove_io_thread() + cache The IO_CACHE to be removed from the share. + + NOTE + + Every thread must do that on exit for not to deadlock other threads. + + The last thread destroys the pthread resources. + + A writer flushes its cache first. + + RETURN + void */ -void remove_io_thread(IO_CACHE *info) + +void remove_io_thread(IO_CACHE *cache) { - IO_CACHE_SHARE *s=info->share; + IO_CACHE_SHARE *cshare= cache->share; + uint total; + DBUG_ENTER("remove_io_thread"); - pthread_mutex_lock(&s->mutex); - s->total--; - if (! s->count--) - pthread_cond_signal(&s->cond); - pthread_mutex_unlock(&s->mutex); -} + /* If the writer goes, it needs to flush the write cache. */ + if (cache == cshare->source_cache) + flush_io_cache(cache); -static int lock_io_cache(IO_CACHE *info, my_off_t pos) -{ - int total; - IO_CACHE_SHARE *s=info->share; + pthread_mutex_lock(&cshare->mutex); + DBUG_PRINT("io_cache_share", ("%s: 0x%lx", + (cache == cshare->source_cache) ? + "writer" : "reader", cache)); - pthread_mutex_lock(&s->mutex); - if (!s->count) + /* Remove from share. */ + total= --cshare->total_threads; + DBUG_PRINT("io_cache_share", ("remaining threads: %u", total)); + + /* Detach from share. */ + cache->share= NULL; + + /* If the writer goes, let the readers know. */ + if (cache == cshare->source_cache) { - s->count=s->total; - return 1; + DBUG_PRINT("io_cache_share", ("writer leaves")); + cshare->source_cache= NULL; } - total=s->total; - s->count--; - while (!s->active || s->active->pos_in_file < pos) - pthread_cond_wait(&s->cond, &s->mutex); + /* If all threads are waiting for me to join the lock, wake them. */ + if (!--cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("the last running thread leaves, wake all")); + pthread_cond_signal(&cshare->cond_writer); + pthread_cond_broadcast(&cshare->cond); + } - if (s->total < total && - (!s->active || s->active->pos_in_file < pos)) - return 1; + pthread_mutex_unlock(&cshare->mutex); - pthread_mutex_unlock(&s->mutex); - return 0; + if (!total) + { + DBUG_PRINT("io_cache_share", ("last thread removed, destroy share")); + pthread_cond_destroy (&cshare->cond_writer); + pthread_cond_destroy (&cshare->cond); + pthread_mutex_destroy(&cshare->mutex); + } + + DBUG_VOID_RETURN; } -static void unlock_io_cache(IO_CACHE *info) + +/* + Lock IO cache and wait for all other threads to join. + + SYNOPSIS + lock_io_cache() + cache The cache of the thread entering the lock. + pos File position of the block to read. + Unused for the write thread. + + DESCRIPTION + + Wait for all threads to finish with the current buffer. We want + all threads to proceed in concert. The last thread to join + lock_io_cache() will read the block from file and all threads start + to use it. Then they will join again for reading the next block. + + The waiting threads detect a fresh buffer by comparing + cshare->pos_in_file with the position they want to process next. + Since the first block may start at position 0, we take + cshare->read_end as an additional condition. This variable is + initialized to NULL and will be set after a block of data is written + to the buffer. + + RETURN + 1 OK, lock in place, go ahead and read. + 0 OK, unlocked, another thread did the read. +*/ + +static int lock_io_cache(IO_CACHE *cache, my_off_t pos) { - pthread_cond_broadcast(&info->share->cond); - pthread_mutex_unlock(&info->share->mutex); + IO_CACHE_SHARE *cshare= cache->share; + DBUG_ENTER("lock_io_cache"); + + /* Enter the lock. */ + pthread_mutex_lock(&cshare->mutex); + cshare->running_threads--; + DBUG_PRINT("io_cache_share", ("%s: 0x%lx pos: %lu running: %u", + (cache == cshare->source_cache) ? + "writer" : "reader", cache, (ulong) pos, + cshare->running_threads)); + + if (cshare->source_cache) + { + /* A write cache is synchronized to the read caches. */ + + if (cache == cshare->source_cache) + { + /* The writer waits until all readers are here. */ + while (cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("writer waits in lock")); + pthread_cond_wait(&cshare->cond_writer, &cshare->mutex); + } + DBUG_PRINT("io_cache_share", ("writer awoke, going to copy")); + + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } + + /* The last thread wakes the writer. */ + if (!cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("waking writer")); + pthread_cond_signal(&cshare->cond_writer); + } + + /* + Readers wait until the data is copied from the writer. Another + reason to stop waiting is the removal of the write thread. If this + happens, we leave the lock with old data in the buffer. + */ + while ((!cshare->read_end || (cshare->pos_in_file < pos)) && + cshare->source_cache) + { + DBUG_PRINT("io_cache_share", ("reader waits in lock")); + pthread_cond_wait(&cshare->cond, &cshare->mutex); + } + + /* + If the writer was removed from the share while this thread was + asleep, we need to simulate an EOF condition. The writer cannot + reset the share variables as they might still be in use by readers + of the last block. When we awake here then because the last + joining thread signalled us. If the writer is not the last, it + will not signal. So it is safe to clear the buffer here. + */ + if (!cshare->read_end || (cshare->pos_in_file < pos)) + { + DBUG_PRINT("io_cache_share", ("reader found writer removed. EOF")); + cshare->read_end= cshare->buffer; /* Empty buffer. */ + cshare->error= 0; /* EOF is not an error. */ + } + } + else + { + /* + There are read caches only. The last thread arriving in + lock_io_cache() continues with a locked cache and reads the block. + */ + if (!cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("last thread joined, going to read")); + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } + + /* + All other threads wait until the requested block is read by the + last thread arriving. Another reason to stop waiting is the + removal of a thread. If this leads to all threads being in the + lock, we have to continue also. The first of the awaken threads + will then do the read. + */ + while ((!cshare->read_end || (cshare->pos_in_file < pos)) && + cshare->running_threads) + { + DBUG_PRINT("io_cache_share", ("reader waits in lock")); + pthread_cond_wait(&cshare->cond, &cshare->mutex); + } + + /* If the block is not yet read, continue with a locked cache and read. */ + if (!cshare->read_end || (cshare->pos_in_file < pos)) + { + DBUG_PRINT("io_cache_share", ("reader awoke, going to read")); + /* Stay locked. Leave the lock later by unlock_io_cache(). */ + DBUG_RETURN(1); + } + + /* Another thread did read the block already. */ + } + DBUG_PRINT("io_cache_share", ("reader awoke, going to process %u bytes", + cshare->read_end ? (uint) + (cshare->read_end - cshare->buffer) : 0)); + + /* + Leave the lock. Do not call unlock_io_cache() later. The thread that + filled the buffer did this and marked all threads as running. + */ + pthread_mutex_unlock(&cshare->mutex); + DBUG_RETURN(0); +} + + +/* + Unlock IO cache. + + SYNOPSIS + unlock_io_cache() + cache The cache of the thread leaving the lock. + + NOTE + This is called by the thread that filled the buffer. It marks all + threads as running and awakes them. This must not be done by any + other thread. + + Do not signal cond_writer. Either there is no writer or the writer + is the only one who can call this function. + + The reason for resetting running_threads to total_threads before + waking all other threads is that it could be possible that this + thread is so fast with processing the buffer that it enters the lock + before even one other thread has left it. If every awoken thread + would increase running_threads by one, this thread could think that + he is again the last to join and would not wait for the other + threads to process the data. + + RETURN + void +*/ + +static void unlock_io_cache(IO_CACHE *cache) +{ + IO_CACHE_SHARE *cshare= cache->share; + DBUG_ENTER("unlock_io_cache"); + DBUG_PRINT("io_cache_share", ("%s: 0x%lx pos: %lu running: %u", + (cache == cshare->source_cache) ? + "writer" : "reader", + cache, (ulong) cshare->pos_in_file, + cshare->total_threads)); + + cshare->running_threads= cshare->total_threads; + pthread_cond_broadcast(&cshare->cond); + pthread_mutex_unlock(&cshare->mutex); + DBUG_VOID_RETURN; } @@ -567,7 +867,7 @@ static void unlock_io_cache(IO_CACHE *info) SYNOPSIS _my_b_read_r() - info IO_CACHE pointer + cache IO_CACHE pointer Buffer Buffer to retrieve count bytes from file Count Number of bytes to read into Buffer @@ -579,7 +879,7 @@ static void unlock_io_cache(IO_CACHE *info) It works as follows: when a thread tries to read from a file (that is, after using all the data from the (shared) buffer), it just - hangs on lock_io_cache(), wating for other threads. When the very + hangs on lock_io_cache(), waiting for other threads. When the very last thread attempts a read, lock_io_cache() returns 1, the thread does actual IO and unlock_io_cache(), which signals all the waiting threads that data is in the buffer. @@ -599,16 +899,17 @@ static void unlock_io_cache(IO_CACHE *info) 1 Error: can't read requested characters */ -int _my_b_read_r(register IO_CACHE *info, byte *Buffer, uint Count) +int _my_b_read_r(register IO_CACHE *cache, byte *Buffer, uint Count) { my_off_t pos_in_file; uint length, diff_length, left_length; + IO_CACHE_SHARE *cshare= cache->share; DBUG_ENTER("_my_b_read_r"); - if ((left_length= (uint) (info->read_end - info->read_pos))) + if ((left_length= (uint) (cache->read_end - cache->read_pos))) { DBUG_ASSERT(Count >= left_length); /* User is not using my_b_read() */ - memcpy(Buffer, info->read_pos, (size_t) (left_length)); + memcpy(Buffer, cache->read_pos, (size_t) (left_length)); Buffer+= left_length; Count-= left_length; } @@ -616,55 +917,133 @@ int _my_b_read_r(register IO_CACHE *info, byte *Buffer, uint Count) { int cnt, len; - pos_in_file= info->pos_in_file + (info->read_end - info->buffer); + pos_in_file= cache->pos_in_file + (cache->read_end - cache->buffer); diff_length= (uint) (pos_in_file & (IO_SIZE-1)); length=IO_ROUND_UP(Count+diff_length)-diff_length; - length=(length <= info->read_length) ? - length + IO_ROUND_DN(info->read_length - length) : - length - IO_ROUND_UP(length - info->read_length) ; - if (info->type != READ_FIFO && - (length > (info->end_of_file - pos_in_file))) - length= (uint) (info->end_of_file - pos_in_file); + length= ((length <= cache->read_length) ? + length + IO_ROUND_DN(cache->read_length - length) : + length - IO_ROUND_UP(length - cache->read_length)); + if (cache->type != READ_FIFO && + (length > (cache->end_of_file - pos_in_file))) + length= (uint) (cache->end_of_file - pos_in_file); if (length == 0) { - info->error= (int) left_length; + cache->error= (int) left_length; DBUG_RETURN(1); } - if (lock_io_cache(info, pos_in_file)) + if (lock_io_cache(cache, pos_in_file)) { - info->share->active=info; - if (info->seek_not_done) /* File touched, do seek */ - VOID(my_seek(info->file,pos_in_file,MY_SEEK_SET,MYF(0))); - len=(int)my_read(info->file,info->buffer, length, info->myflags); - info->read_end=info->buffer + (len == -1 ? 0 : len); - info->error=(len == (int)length ? 0 : len); - info->pos_in_file=pos_in_file; - unlock_io_cache(info); + /* With a synchronized write/read cache we won't come here... */ + DBUG_ASSERT(!cshare->source_cache); + /* + ... unless the writer has gone before this thread entered the + lock. Simulate EOF in this case. It can be distinguished by + cache->file. + */ + if (cache->file < 0) + len= 0; + else + { + if (cache->seek_not_done) /* File touched, do seek */ + VOID(my_seek(cache->file, pos_in_file, MY_SEEK_SET, MYF(0))); + len= (int) my_read(cache->file, cache->buffer, length, cache->myflags); + } + DBUG_PRINT("io_cache_share", ("read %d bytes", len)); + + cache->read_end= cache->buffer + (len == -1 ? 0 : len); + cache->error= (len == (int)length ? 0 : len); + cache->pos_in_file= pos_in_file; + + /* Copy important values to the share. */ + cshare->error= cache->error; + cshare->read_end= cache->read_end; + cshare->pos_in_file= pos_in_file; + + /* Mark all threads as running and wake them. */ + unlock_io_cache(cache); } else { - info->error= info->share->active->error; - info->read_end= info->share->active->read_end; - info->pos_in_file= info->share->active->pos_in_file; - len= (info->error == -1 ? -1 : info->read_end-info->buffer); + /* + With a synchronized write/read cache readers always come here. + Copy important values from the share. + */ + cache->error= cshare->error; + cache->read_end= cshare->read_end; + cache->pos_in_file= cshare->pos_in_file; + + len= ((cache->error == -1) ? -1 : cache->read_end - cache->buffer); } - info->read_pos=info->buffer; - info->seek_not_done=0; + cache->read_pos= cache->buffer; + cache->seek_not_done= 0; if (len <= 0) { - info->error= (int) left_length; + DBUG_PRINT("io_cache_share", ("reader error. len %d left %u", + len, left_length)); + cache->error= (int) left_length; DBUG_RETURN(1); } cnt= ((uint) len > Count) ? (int) Count : len; - memcpy(Buffer, info->read_pos, (size_t) cnt); + memcpy(Buffer, cache->read_pos, (size_t) cnt); Count -= cnt; Buffer+= cnt; left_length+= cnt; - info->read_pos+= cnt; + cache->read_pos+= cnt; } DBUG_RETURN(0); } -#endif + + +/* + Copy data from write cache to read cache. + + SYNOPSIS + copy_to_read_buffer() + write_cache The write cache. + write_buffer The source of data, mostly the cache buffer. + write_length The number of bytes to copy. + + NOTE + The write thread will wait for all read threads to join the cache + lock. Then it copies the data over and wakes the read threads. + + RETURN + void +*/ + +static void copy_to_read_buffer(IO_CACHE *write_cache, + const byte *write_buffer, uint write_length) +{ + IO_CACHE_SHARE *cshare= write_cache->share; + + DBUG_ASSERT(cshare->source_cache == write_cache); + /* + write_length is usually less or equal to buffer_length. + It can be bigger if _my_b_write() is called with a big length. + */ + while (write_length) + { + uint copy_length= min(write_length, write_cache->buffer_length); + int __attribute__((unused)) rc; + + rc= lock_io_cache(write_cache, write_cache->pos_in_file); + /* The writing thread does always have the lock when it awakes. */ + DBUG_ASSERT(rc); + + memcpy(cshare->buffer, write_buffer, copy_length); + + cshare->error= 0; + cshare->read_end= cshare->buffer + copy_length; + cshare->pos_in_file= write_cache->pos_in_file; + + /* Mark all threads as running and wake them. */ + unlock_io_cache(write_cache); + + write_buffer+= copy_length; + write_length-= copy_length; + } +} +#endif /*THREAD*/ /* @@ -1016,6 +1395,7 @@ int _my_b_write(register IO_CACHE *info, const byte *Buffer, uint Count) Buffer+=rest_length; Count-=rest_length; info->write_pos+=rest_length; + if (flush_io_cache(info)) return 1; if (Count >= IO_SIZE) @@ -1028,6 +1408,23 @@ int _my_b_write(register IO_CACHE *info, const byte *Buffer, uint Count) } if (my_write(info->file,Buffer,(uint) length,info->myflags | MY_NABP)) return info->error= -1; + +#ifdef THREAD + /* + In case of a shared I/O cache with a writer we normally do direct + write cache to read cache copy. Simulate this here by direct + caller buffer to read cache copy. Do it after the write so that + the cache readers actions on the flushed part can go in parallel + with the write of the extra stuff. copy_to_read_buffer() + synchronizes writer and readers so that after this call the + readers can act on the extra stuff while the writer can go ahead + and prepare the next output. copy_to_read_buffer() relies on + info->pos_in_file. + */ + if (info->share) + copy_to_read_buffer(info, Buffer, length); +#endif + Count-=length; Buffer+=length; info->pos_in_file+=length; @@ -1048,6 +1445,14 @@ int my_b_append(register IO_CACHE *info, const byte *Buffer, uint Count) { uint rest_length,length; +#ifdef THREAD + /* + Assert that we cannot come here with a shared cache. If we do one + day, we might need to add a call to copy_to_read_buffer(). + */ + DBUG_ASSERT(!info->share); +#endif + lock_append_buffer(info); rest_length=(uint) (info->write_end - info->write_pos); if (Count <= rest_length) @@ -1108,6 +1513,14 @@ int my_block_write(register IO_CACHE *info, const byte *Buffer, uint Count, uint length; int error=0; +#ifdef THREAD + /* + Assert that we cannot come here with a shared cache. If we do one + day, we might need to add a call to copy_to_read_buffer(). + */ + DBUG_ASSERT(!info->share); +#endif + if (pos < info->pos_in_file) { /* Of no overlap, write everything without buffering */ @@ -1184,6 +1597,17 @@ int my_b_flush_io_cache(IO_CACHE *info, int need_append_buffer_lock) if ((length=(uint) (info->write_pos - info->write_buffer))) { +#ifdef THREAD + /* + In case of a shared I/O cache with a writer we do direct write + cache to read cache copy. Do it before the write here so that + the readers can work in parallel with the write. + copy_to_read_buffer() relies on info->pos_in_file. + */ + if (info->share) + copy_to_read_buffer(info, info->write_buffer, length); +#endif + pos_in_file=info->pos_in_file; /* If we have append cache, we always open the file with @@ -1262,16 +1686,10 @@ int end_io_cache(IO_CACHE *info) #ifdef THREAD /* - if IO_CACHE is shared between several threads, only one - thread needs to call end_io_cache() - just as init_io_cache() - should be called only once and then memcopy'ed + Every thread must call remove_io_thread(). The last one destroys + the share elements. */ - if (info->share) - { - pthread_cond_destroy (&info->share->cond); - pthread_mutex_destroy(&info->share->mutex); - info->share=0; - } + DBUG_ASSERT(!info->share || !info->share->total_threads); #endif if ((pre_close=info->pre_close)) From 296d64dbc3376aeb01c837459502e72225330074 Mon Sep 17 00:00:00 2001 From: "istruewing@chilla.local" <> Date: Wed, 11 Oct 2006 12:24:59 +0200 Subject: [PATCH 5/8] Bug#12240 - Rows Examined in Slow Log showing incorrect number? Examined rows are counted for every join part. The per-join-part counter was incremented over all iterations. The result variable was replaced at the end of every iteration. The final result was the number of examined rows by the join part that ended its execution as the last one. The numbers of other join parts was lost. Now we reset the per-join-part counter before every iteration and add it to the result variable at the end of the iteration. That way we get the sum of all iterations of all join parts. No test case. Testing this needs a look into the slow query log. I don't know of a way to do this portably with the test suite. --- sql/sql_select.cc | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) diff --git a/sql/sql_select.cc b/sql/sql_select.cc index 74a9fc573c4..5a5c7663365 100644 --- a/sql/sql_select.cc +++ b/sql/sql_select.cc @@ -1171,12 +1171,13 @@ JOIN::exec() thd->examined_row_count= 0; DBUG_VOID_RETURN; } - /* - don't reset the found rows count if there're no tables - as FOUND_ROWS() may be called. - */ + /* + Don't reset the found rows count if there're no tables as + FOUND_ROWS() may be called. Never reset the examined row count here. + It must be accumulated from all join iterations of all join parts. + */ if (tables) - thd->limit_found_rows= thd->examined_row_count= 0; + thd->limit_found_rows= 0; if (zero_result_cause) { @@ -1224,6 +1225,12 @@ JOIN::exec() List *curr_all_fields= &all_fields; List *curr_fields_list= &fields_list; TABLE *curr_tmp_table= 0; + /* + Initialize examined rows here because the values from all join parts + must be accumulated in examined_row_count. Hence every join + iteration must count from zero. + */ + curr_join->examined_rows= 0; /* Create a tmp table if distinct or if the sort is too complicated */ if (need_tmp) @@ -1572,7 +1579,10 @@ JOIN::exec() error= thd->net.report_error ? -1 : do_select(curr_join, curr_fields_list, NULL, procedure); thd->limit_found_rows= curr_join->send_records; - thd->examined_row_count= curr_join->examined_rows; + /* Accumulate the counts from all join iterations of all join parts. */ + thd->examined_row_count+= curr_join->examined_rows; + DBUG_PRINT("counts", ("thd->examined_row_count: %lu", + (ulong) thd->examined_row_count)); DBUG_VOID_RETURN; } @@ -6179,6 +6189,8 @@ sub_select(JOIN *join,JOIN_TAB *join_tab,bool end_of_records) return -2; /* purecov: inspected */ } join->examined_rows++; + DBUG_PRINT("counts", ("join->examined_rows++: %lu", + (ulong) join->examined_rows)); join->thd->row_count++; if (!on_expr || on_expr->val_int()) { From 62b2044279e3142a8947b9bb44af8b40a963dc1e Mon Sep 17 00:00:00 2001 From: "svoj@mysql.com/april.(none)" <> Date: Wed, 11 Oct 2006 20:34:20 +0500 Subject: [PATCH 6/8] BUG#22562 - REPAIR TABLE .. USE_FRM causes server crash on Windows and server hangs on Linux If REPAIR TABLE ... USE_FRM is issued for table that is located in different than default database server crash could happen. In reopen_name_locked_table take database name from table_list (user specified or default database) instead of from thd (default database). Affects 4.1 only. --- mysql-test/r/repair.result | 7 +++++++ mysql-test/t/repair.test | 10 ++++++++++ sql/sql_base.cc | 2 +- 3 files changed, 18 insertions(+), 1 deletion(-) diff --git a/mysql-test/r/repair.result b/mysql-test/r/repair.result index c069824e9f0..815305013be 100644 --- a/mysql-test/r/repair.result +++ b/mysql-test/r/repair.result @@ -48,3 +48,10 @@ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_par t1 1 a 1 a A 5 NULL NULL YES BTREE SET myisam_repair_threads=@@global.myisam_repair_threads; DROP TABLE t1; +CREATE TABLE t1(a INT); +USE mysql; +REPAIR TABLE test.t1 USE_FRM; +Table Op Msg_type Msg_text +test.t1 repair status OK +USE test; +DROP TABLE t1; diff --git a/mysql-test/t/repair.test b/mysql-test/t/repair.test index f086d5b0c2a..91d03ce3082 100644 --- a/mysql-test/t/repair.test +++ b/mysql-test/t/repair.test @@ -45,4 +45,14 @@ SHOW INDEX FROM t1; SET myisam_repair_threads=@@global.myisam_repair_threads; DROP TABLE t1; +# +# BUG#22562 - REPAIR TABLE .. USE_FRM causes server crash on Windows and +# server hangs on Linux +# +CREATE TABLE t1(a INT); +USE mysql; +REPAIR TABLE test.t1 USE_FRM; +USE test; +DROP TABLE t1; + # End of 4.1 tests diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 0a9529d6067..561d75a765e 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -821,7 +821,7 @@ TABLE *reopen_name_locked_table(THD* thd, TABLE_LIST* table_list) if (!(table = table_list->table)) DBUG_RETURN(0); - char* db = thd->db ? thd->db : table_list->db; + char *db= table_list->db; char* table_name = table_list->real_name; char key[MAX_DBKEY_LENGTH]; uint key_length; From 115616381dd2238d4889271357d7cae228ae09fa Mon Sep 17 00:00:00 2001 From: "gkodinov/kgeorge@macbook.gmz" <> Date: Mon, 16 Oct 2006 13:10:25 +0300 Subject: [PATCH 7/8] BUG#14019 : group by converts literal string to column name When resolving unqualified name references MySQL was not checking what is the item type for the reference. Thus e.g a string literal item that has by convention a name equal to its string value will also work as a reference to a SELECT list item or a table field. Fixed by allowing only Item_ref or Item_field to referenced by (unqualified) name. --- mysql-test/r/func_gconcat.result | 5 ---- mysql-test/r/group_by.result | 45 ++++++++++++++++++++++++++++++++ mysql-test/t/func_gconcat.test | 1 - mysql-test/t/group_by.test | 23 ++++++++++++++++ sql/sql_base.cc | 11 ++++++-- 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/mysql-test/r/func_gconcat.result b/mysql-test/r/func_gconcat.result index db0125b7d4f..d4a46bfd79f 100644 --- a/mysql-test/r/func_gconcat.result +++ b/mysql-test/r/func_gconcat.result @@ -74,11 +74,6 @@ grp group_concat(c order by 1) 1 a 2 b,c 3 C,D,d,d,D,E -select grp,group_concat(c order by "c") from t1 group by grp; -grp group_concat(c order by "c") -1 a -2 b,c -3 C,D,d,d,D,E select grp,group_concat(distinct c order by c) from t1 group by grp; grp group_concat(distinct c order by c) 1 a diff --git a/mysql-test/r/group_by.result b/mysql-test/r/group_by.result index 4ad28091164..61b73dc7005 100644 --- a/mysql-test/r/group_by.result +++ b/mysql-test/r/group_by.result @@ -773,3 +773,48 @@ select sql_buffer_result max(f1)+1 from t1; max(f1)+1 3 drop table t1; +CREATE TABLE t1(a INT); +INSERT INTO t1 VALUES (1),(2); +SELECT a FROM t1 GROUP BY 'a'; +a +1 +SELECT a FROM t1 GROUP BY "a"; +a +1 +SELECT a FROM t1 GROUP BY `a`; +a +1 +2 +set sql_mode=ANSI_QUOTES; +SELECT a FROM t1 GROUP BY "a"; +a +1 +2 +SELECT a FROM t1 GROUP BY 'a'; +a +1 +SELECT a FROM t1 GROUP BY `a`; +a +1 +2 +set sql_mode=''; +SELECT a FROM t1 HAVING 'a' > 1; +a +SELECT a FROM t1 HAVING "a" > 1; +a +SELECT a FROM t1 HAVING `a` > 1; +a +2 +SELECT a FROM t1 ORDER BY 'a' DESC; +a +1 +2 +SELECT a FROM t1 ORDER BY "a" DESC; +a +1 +2 +SELECT a FROM t1 ORDER BY `a` DESC; +a +2 +1 +DROP TABLE t1; diff --git a/mysql-test/t/func_gconcat.test b/mysql-test/t/func_gconcat.test index 8f50690dd8b..d51d88d50ef 100644 --- a/mysql-test/t/func_gconcat.test +++ b/mysql-test/t/func_gconcat.test @@ -32,7 +32,6 @@ select grp,group_concat(d order by a desc) from t1 group by grp; select grp,group_concat(a order by a,d+c-ascii(c)-a) from t1 group by grp; select grp,group_concat(a order by d+c-ascii(c),a) from t1 group by grp; select grp,group_concat(c order by 1) from t1 group by grp; -select grp,group_concat(c order by "c") from t1 group by grp; select grp,group_concat(distinct c order by c) from t1 group by grp; select grp,group_concat(distinct c order by c desc) from t1 group by grp; explain extended select grp,group_concat(distinct c order by c desc) from t1 group by grp; diff --git a/mysql-test/t/group_by.test b/mysql-test/t/group_by.test index f14fab2d30e..064d46aa0c0 100644 --- a/mysql-test/t/group_by.test +++ b/mysql-test/t/group_by.test @@ -610,4 +610,27 @@ select sql_buffer_result max(f1) is null from t1; select sql_buffer_result max(f1)+1 from t1; drop table t1; +# +# BUG#14019-4.1-opt +# +CREATE TABLE t1(a INT); INSERT INTO t1 VALUES (1),(2); + +SELECT a FROM t1 GROUP BY 'a'; +SELECT a FROM t1 GROUP BY "a"; +SELECT a FROM t1 GROUP BY `a`; + +set sql_mode=ANSI_QUOTES; +SELECT a FROM t1 GROUP BY "a"; +SELECT a FROM t1 GROUP BY 'a'; +SELECT a FROM t1 GROUP BY `a`; +set sql_mode=''; + +SELECT a FROM t1 HAVING 'a' > 1; +SELECT a FROM t1 HAVING "a" > 1; +SELECT a FROM t1 HAVING `a` > 1; + +SELECT a FROM t1 ORDER BY 'a' DESC; +SELECT a FROM t1 ORDER BY "a" DESC; +SELECT a FROM t1 ORDER BY `a` DESC; +DROP TABLE t1; # End of 4.1 tests diff --git a/sql/sql_base.cc b/sql/sql_base.cc index 0a9529d6067..40adf5e1f15 100644 --- a/sql/sql_base.cc +++ b/sql/sql_base.cc @@ -2284,12 +2284,19 @@ find_item_in_list(Item *find, List &items, uint *counter, const char *field_name=0; const char *table_name=0; bool found_unaliased_non_uniq= 0; + /* + true if the item that we search for is a valid name reference + (and not an item that happens to have a name). + */ + bool is_ref_by_name= 0; uint unaliased_counter; LINT_INIT(unaliased_counter); *unaliased= FALSE; - if (find->type() == Item::FIELD_ITEM || find->type() == Item::REF_ITEM) + is_ref_by_name= (find->type() == Item::FIELD_ITEM || + find->type() == Item::REF_ITEM); + if (is_ref_by_name) { field_name= ((Item_ident*) find)->field_name; table_name= ((Item_ident*) find)->table_name; @@ -2401,7 +2408,7 @@ find_item_in_list(Item *find, List &items, uint *counter, } } else if (!table_name && (item->eq(find,0) || - find->name && item->name && + is_ref_by_name && find->name && item->name && !my_strcasecmp(system_charset_info, item->name,find->name))) { From a2e0059f3c1cf647b1feaf80780a917476ae4bba Mon Sep 17 00:00:00 2001 From: "svoj@mysql.com/april.(none)" <> Date: Wed, 18 Oct 2006 17:57:29 +0500 Subject: [PATCH 8/8] BUG#23175 - MYISAM crash/repair failed during repair Repair table could crash a server if there is not sufficient memory (myisam_sort_buffer_size) to operate. Affects not only repair, but also all statements that use create index by sort: repair by sort, parallel repair, bulk insert. Return an error if there is not sufficient memory to store at least one key per BUFFPEK. Also fixed memory leak if thr_find_all_keys returns an error. --- myisam/sort.c | 8 ++++++-- mysql-test/r/repair.result | 28 ++++++++++++++++++++++++++++ mysql-test/t/repair.test | 28 ++++++++++++++++++++++++++++ 3 files changed, 62 insertions(+), 2 deletions(-) diff --git a/myisam/sort.c b/myisam/sort.c index 727840709da..154d50d4d39 100644 --- a/myisam/sort.c +++ b/myisam/sort.c @@ -148,7 +148,8 @@ int _create_index_by_sort(MI_SORT_PARAM *info,my_bool no_messages, skr=maxbuffer; if (memavl < sizeof(BUFFPEK)*(uint) maxbuffer || (keys=(memavl-sizeof(BUFFPEK)*(uint) maxbuffer)/ - (sort_length+sizeof(char*))) <= 1) + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) { mi_check_print_error(info->sort_info->param, "sort_buffer_size is to small"); @@ -363,7 +364,8 @@ pthread_handler_decl(thr_find_all_keys,arg) skr=maxbuffer; if (memavl < sizeof(BUFFPEK)*maxbuffer || (keys=(memavl-sizeof(BUFFPEK)*maxbuffer)/ - (sort_length+sizeof(char*))) <= 1) + (sort_length+sizeof(char*))) <= 1 || + keys < (uint) maxbuffer) { mi_check_print_error(sort_param->sort_info->param, "sort_buffer_size is to small"); @@ -497,6 +499,8 @@ int thr_write_keys(MI_SORT_PARAM *sort_param) if (!sinfo->sort_keys) { got_error=1; + my_free(mi_get_rec_buff_ptr(info, sinfo->rec_buff), + MYF(MY_ALLOW_ZERO_PTR)); continue; } if (!got_error) diff --git a/mysql-test/r/repair.result b/mysql-test/r/repair.result index c069824e9f0..8b3782ec2a4 100644 --- a/mysql-test/r/repair.result +++ b/mysql-test/r/repair.result @@ -48,3 +48,31 @@ Table Non_unique Key_name Seq_in_index Column_name Collation Cardinality Sub_par t1 1 a 1 a A 5 NULL NULL YES BTREE SET myisam_repair_threads=@@global.myisam_repair_threads; DROP TABLE t1; +CREATE TABLE t1(a CHAR(255), KEY(a)); +SET myisam_sort_buffer_size=4096; +INSERT INTO t1 VALUES +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'); +SET myisam_repair_threads=2; +REPAIR TABLE t1; +Table Op Msg_type Msg_text +test.t1 repair error sort_buffer_size is to small +test.t1 repair warning Number of rows changed from 0 to 157 +test.t1 repair status OK +SET myisam_repair_threads=@@global.myisam_repair_threads; +SET myisam_sort_buffer_size=@@global.myisam_sort_buffer_size; +DROP TABLE t1; diff --git a/mysql-test/t/repair.test b/mysql-test/t/repair.test index f086d5b0c2a..b192d5c0a64 100644 --- a/mysql-test/t/repair.test +++ b/mysql-test/t/repair.test @@ -45,4 +45,32 @@ SHOW INDEX FROM t1; SET myisam_repair_threads=@@global.myisam_repair_threads; DROP TABLE t1; +# +# BUG#23175 - MYISAM crash/repair failed during repair +# +CREATE TABLE t1(a CHAR(255), KEY(a)); +SET myisam_sort_buffer_size=4096; +INSERT INTO t1 VALUES +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'),('0'), +('0'),('0'),('0'),('0'),('0'),('0'),('0'); +SET myisam_repair_threads=2; +REPAIR TABLE t1; +SET myisam_repair_threads=@@global.myisam_repair_threads; +SET myisam_sort_buffer_size=@@global.myisam_sort_buffer_size; +DROP TABLE t1; + # End of 4.1 tests