From a3a55de7181e54c32f816c7068acba2774455809 Mon Sep 17 00:00:00 2001 From: unknown Date: Tue, 19 Jun 2001 22:44:43 +0300 Subject: [PATCH 01/11] fsp0fsp.c Fix a bug in freeing of a segment in the tablespace innobase/fsp/fsp0fsp.c: Fix a bug in freeing of a segment in the tablespace --- innobase/fsp/fsp0fsp.c | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index 095ca497ee2..82cc55dfba5 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -127,11 +127,9 @@ typedef byte fseg_inode_t; page number within space, FIL_NULL means that the slot is not in use */ /*-------------------------------------*/ -#define FSEG_INODE_SIZE (16 + 3 * FLST_BASE_NODE_SIZE +\ - FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) +#define FSEG_INODE_SIZE (16 + 3 * FLST_BASE_NODE_SIZE + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) -#define FSP_SEG_INODES_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10)\ - / FSEG_INODE_SIZE) +#define FSP_SEG_INODES_PER_PAGE ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE) /* Number of segment inodes which fit on a single page */ @@ -198,8 +196,7 @@ the extent are free and which contain old tuple version to clean. */ /* File extent data structure size in bytes. The "+ 7 ) / 8" part in the definition rounds the number of bytes upward. */ -#define XDES_SIZE (XDES_BITMAP +\ - (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8) +#define XDES_SIZE (XDES_BITMAP + (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8) /* Offset of the descriptor array on a descriptor page */ #define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE) @@ -2633,6 +2630,14 @@ fseg_free_step( MTR_MEMO_X_LOCK)); mtr_x_lock(fil_space_get_latch(space), mtr); + descr = xdes_get_descriptor(space, buf_frame_get_page_no(header), mtr); + + /* Check that the header resides on a page which has not been + freed yet */ + + ut_a(descr); + ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header) + % FSP_EXTENT_SIZE, mtr) == FALSE); inode = fseg_inode_get(header, mtr); descr = fseg_get_first_extent(inode, mtr); @@ -2647,7 +2652,6 @@ fseg_free_step( } /* Free a frag page */ - n = fseg_find_last_used_frag_page_slot(inode, mtr); if (n == ULINT_UNDEFINED) { @@ -2659,6 +2663,16 @@ fseg_free_step( fseg_free_page_low(inode, space, fseg_get_nth_frag_page_no(inode, n, mtr), mtr); + + n = fseg_find_last_used_frag_page_slot(inode, mtr); + + if (n == ULINT_UNDEFINED) { + /* Freeing completed: free the segment inode */ + fsp_free_seg_inode(space, inode, mtr); + + return(TRUE); + } + return(FALSE); } From 0481ebbe4f71ff619a20c582f2dd500ad5641f80 Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 21 Jun 2001 16:48:18 -0600 Subject: [PATCH 02/11] merged test from 4.0 mysql-test/r/drop.result: merged a test from 4.0 mysql-test/t/drop.test: updated test results --- mysql-test/r/drop.result | 2 ++ mysql-test/t/drop.test | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/mysql-test/r/drop.result b/mysql-test/r/drop.result index c7a4ca9e0d2..741fc6bba85 100644 --- a/mysql-test/r/drop.result +++ b/mysql-test/r/drop.result @@ -1,5 +1,7 @@ n 1 +n +4 Database foo mysql diff --git a/mysql-test/t/drop.test b/mysql-test/t/drop.test index 8f9aa852e8b..2a45fe8253b 100644 --- a/mysql-test/t/drop.test +++ b/mysql-test/t/drop.test @@ -10,6 +10,22 @@ insert into t1 values(2); create table t1(n int); drop table t1; select * from t1; + +#now test for a bug in drop database - it is important that the name +#of the table is the same as the name of the database - in the original +#code this triggered a bug +drop database if exists foo; +create database foo; +drop database if exists foo; +create database foo; +create table foo.foo (n int); +insert into foo.foo values (4); +select * from foo.foo; +drop database if exists foo; +create database foo; +drop database foo; + +# test drop/create database and FLUSH TABLES WITH READ LOCK drop database if exists foo; flush tables with read lock; --error 1209 @@ -23,3 +39,5 @@ drop database foo; unlock tables; drop database foo; show databases; + + From a72c990f22ac3025de642de67b6b8805047bffdc Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 22 Jun 2001 04:51:20 -0500 Subject: [PATCH 03/11] manual.texi Updated mirror URL Docs/manual.texi: Updated mirror URL --- Docs/manual.texi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Docs/manual.texi b/Docs/manual.texi index 2193cdab4ad..9f91ab8aa0c 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -4741,7 +4741,7 @@ Please report bad or out-of-date mirrors to @email{webmaster@@mysql.com}. @c Added 20000925 @image{Flags/usa} USA [ValueClick, Los Angeles CA] @ @uref{http://mysql.valueclick.com/, WWW} -@uref{ftp://mysql.valueclick.com/mysql/, FTP} +@uref{ftp://mysql.valueclick.com/pub/mysql/Downloads/, FTP} @c @item @c Not ok 20000919; Non-existent (Matt) From fb70990c10964582d9c7a6472f2af7f78a69adfc Mon Sep 17 00:00:00 2001 From: unknown Date: Sat, 23 Jun 2001 22:25:47 +0300 Subject: [PATCH 04/11] btr0pcur.c Fix a bug in persistent cursor restoration: this could cause crashes especially if the buffer pool is small innobase/btr/btr0pcur.c: Fix a bug in persistent cursor restoration: this could cause crashes especially if the buffer pool is small --- innobase/btr/btr0pcur.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index 0388785b3fe..5e625553929 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -246,6 +246,12 @@ btr_pcur_restore_position( && btr_pcur_is_on_user_rec(cursor, mtr) && (0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor)))) { + /* We have to store the NEW value for the modify clock, since + the cursor can now be on a different page! */ + + cursor->modify_clock = buf_frame_get_modify_clock( + buf_frame_align( + btr_pcur_get_rec(cursor))); mem_heap_free(heap); return(TRUE); From ecb14493f23019c7b59b78e2eaeb3ad168aaca77 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 24 Jun 2001 19:33:50 +0300 Subject: [PATCH 05/11] buf0buf.c Several bug fixes buf0flu.c Several bug fixes buf0rea.c Several bug fixes buf0lru.c Clearer error message innobase/buf/buf0lru.c: Clearer error message innobase/buf/buf0buf.c: Several bug fixes innobase/buf/buf0flu.c: Several bug fixes innobase/buf/buf0rea.c: Several bug fixes --- innobase/buf/buf0buf.c | 73 ++++++++++++++++++++++++++++++++++++++++++ innobase/buf/buf0flu.c | 6 ++-- innobase/buf/buf0lru.c | 6 ++-- innobase/buf/buf0rea.c | 26 +++++++++++---- 4 files changed, 100 insertions(+), 11 deletions(-) diff --git a/innobase/buf/buf0buf.c b/innobase/buf/buf0buf.c index 0046a3761a6..ede9e621462 100644 --- a/innobase/buf/buf0buf.c +++ b/innobase/buf/buf0buf.c @@ -241,6 +241,8 @@ buf_block_init( block->modify_clock = ut_dulint_zero; + block->file_page_was_freed = FALSE; + rw_lock_create(&(block->lock)); ut_ad(rw_lock_validate(&(block->lock))); @@ -542,6 +544,64 @@ buf_page_peek( return(FALSE); } +/************************************************************************ +Sets file_page_was_freed TRUE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_set_file_page_was_freed( +/*=============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset) /* in: page number */ +{ + buf_block_t* block; + + mutex_enter_fast(&(buf_pool->mutex)); + + block = buf_page_hash_get(space, offset); + + if (block) { + block->file_page_was_freed = TRUE; + } + + mutex_exit(&(buf_pool->mutex)); + + return(block); +} + +/************************************************************************ +Sets file_page_was_freed FALSE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_reset_file_page_was_freed( +/*===============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset) /* in: page number */ +{ + buf_block_t* block; + + mutex_enter_fast(&(buf_pool->mutex)); + + block = buf_page_hash_get(space, offset); + + if (block) { + block->file_page_was_freed = FALSE; + } + + mutex_exit(&(buf_pool->mutex)); + + return(block); +} + /************************************************************************ This is the general function used to get access to a database page. */ @@ -646,6 +706,9 @@ loop: block->accessed = TRUE; +#ifdef UNIV_DEBUG_FILE_ACCESSES + ut_a(block->file_page_was_freed == FALSE); +#endif mutex_exit(&(buf_pool->mutex)); #ifdef UNIV_DEBUG @@ -842,6 +905,9 @@ buf_page_optimistic_get_func( ut_ad(block->buf_fix_count > 0); ut_ad(block->state == BUF_BLOCK_FILE_PAGE); +#ifdef UNIV_DEBUG_FILE_ACCESSES + ut_a(block->file_page_was_freed == FALSE); +#endif if (!accessed) { /* In the case of a first access, try to apply linear read-ahead */ @@ -949,6 +1015,9 @@ buf_page_get_known_nowait( #endif ut_ad(block->buf_fix_count > 0); ut_ad(block->state == BUF_BLOCK_FILE_PAGE); +#ifdef UNIV_DEBUG_FILE_ACCESSES + ut_a(block->file_page_was_freed == FALSE); +#endif #ifdef UNIV_IBUF_DEBUG ut_a((mode == BUF_KEEP_OLD) @@ -996,6 +1065,8 @@ buf_page_init( block->n_hash_helps = 0; block->is_hashed = FALSE; + + block->file_page_was_freed = FALSE; } /************************************************************************ @@ -1126,6 +1197,8 @@ buf_page_create( #ifdef UNIV_IBUF_DEBUG ut_a(ibuf_count_get(block->space, block->offset) == 0); #endif + block->file_page_was_freed = FALSE; + /* Page can be found in buf_pool */ mutex_exit(&(buf_pool->mutex)); diff --git a/innobase/buf/buf0flu.c b/innobase/buf/buf0flu.c index 90bdde1ebc6..7129b8d20a9 100644 --- a/innobase/buf/buf0flu.c +++ b/innobase/buf/buf0flu.c @@ -182,8 +182,8 @@ buf_flush_write_complete( buf_pool->LRU_flush_ended++; } -/* printf("n pending flush %lu\n", - buf_pool->n_flush[block->flush_type]); */ + /* printf("n pending flush %lu\n", + buf_pool->n_flush[block->flush_type]); */ if ((buf_pool->n_flush[block->flush_type] == 0) && (buf_pool->init_flush[block->flush_type] == FALSE)) { @@ -421,6 +421,8 @@ buf_flush_try_neighbors( /* In simulated aio we wake up the i/o-handler threads now that we have posted a batch of writes: */ + /* printf("Flush count %lu ; Waking i/o handlers\n", count); */ + os_aio_simulated_wake_handler_threads(); return(count); diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c index 4626dc2757b..142beaaaa15 100644 --- a/innobase/buf/buf0lru.c +++ b/innobase/buf/buf0lru.c @@ -260,9 +260,9 @@ loop: */ if (n_iterations > 30) { fprintf(stderr, - "Innobase: Warning: difficult to find free blocks from\n" - "Innobase: the buffer pool! Consider increasing the\n" - "Innobase: buffer pool size.\n"); + "InnoDB: Warning: difficult to find free blocks from\n" + "InnoDB: the buffer pool (%lu search iterations)! Consider\n" + "InnoDB: increasing the buffer pool size.\n", n_iterations); } } diff --git a/innobase/buf/buf0rea.c b/innobase/buf/buf0rea.c index 644dd226a0e..728bf4404b8 100644 --- a/innobase/buf/buf0rea.c +++ b/innobase/buf/buf0rea.c @@ -18,6 +18,7 @@ Created 11/5/1995 Heikki Tuuri #include "log0recv.h" #include "trx0sys.h" #include "os0file.h" +#include "srv0start.h" /* The size in blocks of the area where the random read-ahead algorithm counts the accessed pages when deciding whether to read-ahead */ @@ -132,10 +133,16 @@ buf_read_ahead_random( ulint low, high; ulint i; - if (ibuf_bitmap_page(offset)) { + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); + } - /* If it is an ibuf bitmap page, we do no read-ahead, as - that could break the ibuf page access order */ + if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) { + + /* If it is an ibuf bitmap page or trx sys hdr, we do + no read-ahead, as that could break the ibuf page access + order */ return(0); } @@ -301,9 +308,16 @@ buf_read_ahead_linear( ulint low, high; ulint i; - if (ibuf_bitmap_page(offset)) { - /* If it is an ibuf bitmap page, we do no read-ahead, as - that could break the ibuf page access order */ + if (srv_startup_is_before_trx_rollback_phase) { + /* No read-ahead to avoid thread deadlocks */ + return(0); + } + + if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) { + + /* If it is an ibuf bitmap page or trx sys hdr, we do + no read-ahead, as that could break the ibuf page access + order */ return(0); } From 5b6c96202b5300d92b49ec644113061b5f67d7c1 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 24 Jun 2001 19:51:20 +0300 Subject: [PATCH 06/11] sync0sync.c Do not use in-line assembly in GCC srv0start.c Eliminate a deadlock of threads at startup row0mysql.c Several bug fixes row0umod.c Several bug fixes row0upd.c Several bug fixes os0file.c Revert back to fsync as default flush method log0recv.c Several bug fixes ibuf0ibuf.c Several bug fixes fsp0fsp.c Several bug fixes trx0undo.c Put some assertions to uncover possible bugs dict0boot.c Several bug fixes innobase/dict/dict0boot.c: Several bug fixes innobase/trx/trx0undo.c: Put some assertions to uncover possible bugs innobase/fsp/fsp0fsp.c: Several bug fixes innobase/ibuf/ibuf0ibuf.c: Several bug fixes innobase/log/log0recv.c: Several bug fixes innobase/os/os0file.c: Revert back to fsync as default flush method innobase/row/row0mysql.c: Several bug fixes innobase/row/row0umod.c: Several bug fixes innobase/row/row0upd.c: Several bug fixes innobase/srv/srv0start.c: Eliminate a deadlock of threads at startup innobase/sync/sync0sync.c: Do not use in-line assembly in GCC --- innobase/dict/dict0boot.c | 9 +++++---- innobase/fsp/fsp0fsp.c | 12 ++++++++++++ innobase/ibuf/ibuf0ibuf.c | 8 ++++++++ innobase/log/log0recv.c | 6 +++--- innobase/os/os0file.c | 6 ++++++ innobase/row/row0mysql.c | 6 +++++- innobase/row/row0umod.c | 27 ++++++++++++++++++++++---- innobase/row/row0upd.c | 30 ++++++++++++++++++++++++----- innobase/srv/srv0start.c | 20 +++++++++++++++++--- innobase/sync/sync0sync.c | 40 +++++++++++++++++++++++++++++++++++++++ innobase/trx/trx0undo.c | 10 +++++++++- 11 files changed, 153 insertions(+), 21 deletions(-) diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c index 260e8d4c276..35fdfce16a6 100644 --- a/innobase/dict/dict0boot.c +++ b/innobase/dict/dict0boot.c @@ -313,6 +313,11 @@ dict_boot(void) mtr_commit(&mtr); /*-------------------------*/ + + /* Initialize the insert buffer table and index for each tablespace */ + + ibuf_init_at_db_start(); + /* Load definitions of other indexes on system tables */ dict_load_sys_table(dict_sys->sys_tables); @@ -320,10 +325,6 @@ dict_boot(void) dict_load_sys_table(dict_sys->sys_indexes); dict_load_sys_table(dict_sys->sys_fields); - /* Initialize the insert buffer table and index for each tablespace */ - - ibuf_init_at_db_start(); - mutex_exit(&(dict_sys->mutex)); } diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index 82cc55dfba5..101fb5f3ba0 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -2536,6 +2536,10 @@ fseg_free_page( seg_inode = fseg_inode_get(seg_header, mtr); fseg_free_page_low(seg_inode, space, page, mtr); + +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_set_file_page_was_freed(space, page); +#endif } /************************************************************************** @@ -2599,6 +2603,14 @@ fseg_free_extent( } fsp_free_extent(space, page, mtr); + +#ifdef UNIV_DEBUG_FILE_ACCESSES + for (i = 0; i < FSP_EXTENT_SIZE; i++) { + + buf_page_set_file_page_was_freed(space, + first_page_in_extent + i); + } +#endif } /************************************************************************** diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index 7227b54e71e..171c6169927 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -1382,6 +1382,9 @@ ibuf_remove_free_page( fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, space, page_no, &mtr); +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_reset_file_page_was_freed(space, page_no); +#endif ibuf_enter(); mutex_enter(&ibuf_mutex); @@ -1413,6 +1416,9 @@ ibuf_remove_free_page( ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, FALSE, &mtr); +#ifdef UNIV_DEBUG_FILE_ACCESSES + buf_page_set_file_page_was_freed(space, page_no); +#endif mtr_commit(&mtr); mutex_exit(&ibuf_mutex); @@ -2431,6 +2437,8 @@ ibuf_merge_or_delete_for_page( block = buf_block_align(page); rw_lock_x_lock_move_ownership(&(block->lock)); + + ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); } n_inserts = 0; diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index e93cd3f0364..d16085a2d6f 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -944,9 +944,9 @@ recv_read_in_area( } buf_read_recv_pages(FALSE, space, page_nos, n); - - /* printf("Recv pages at %lu n %lu\n", page_nos[0], n); */ - + /* + printf("Recv pages at %lu n %lu\n", page_nos[0], n); + */ return(n); } diff --git a/innobase/os/os0file.c b/innobase/os/os0file.c index fa0c266a82a..668d74d75b5 100644 --- a/innobase/os/os0file.c +++ b/innobase/os/os0file.c @@ -11,6 +11,7 @@ Created 10/21/1995 Heikki Tuuri #include "ut0mem.h" #include "srv0srv.h" +#undef HAVE_FDATASYNC #ifdef POSIX_ASYNC_IO /* We assume in this case that the OS has standard Posix aio (at least SunOS @@ -562,6 +563,11 @@ os_file_flush( return(TRUE); } + fprintf(stderr, + "InnoDB: Error: the OS said file flush did not succeed\n"); + + os_file_handle_error(file, NULL); + return(FALSE); #endif } diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index aa5259cbaf8..8e1a584f667 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -824,7 +824,11 @@ row_create_table_for_mysql( } else { assert(err == DB_DUPLICATE_KEY); fprintf(stderr, - "Innobase: error: table %s already exists in Innobase data dictionary\n", + "InnoDB: Error: table %s already exists in InnoDB internal\n" + "InnoDB: data dictionary. Have you deleted the .frm file\n" + "InnoDB: and not used DROP TABLE? Have you used DROP DATABASE\n" + "InnoDB: for InnoDB tables in MySQL version <= 3.23.39?\n" + "InnoDB: See the Restrictions section of the InnoDB manual.\n", table->name); } diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index 70cf0fe5a32..c8db428bade 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -361,6 +361,7 @@ row_undo_mod_del_unmark_sec( btr_cur_t* btr_cur; ulint err; ibool found; + char* err_buf; UT_NOT_USED(node); @@ -369,13 +370,31 @@ row_undo_mod_del_unmark_sec( found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); - ut_a(found); + if (!found) { + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); - btr_cur = btr_pcur_get_btr_cur(&pcur); + fprintf(stderr, "InnoDB: error in sec index entry del undo in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); - err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, + rec_sprintf(err_buf, 900, btr_pcur_get_rec(&pcur)); + fprintf(stderr, "InnoDB: record %s\n", err_buf); + + fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + + mem_free(err_buf); + + } else { + + btr_cur = btr_pcur_get_btr_cur(&pcur); + + err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, btr_cur, FALSE, thr, &mtr); - ut_ad(err == DB_SUCCESS); + ut_ad(err == DB_SUCCESS); + } btr_pcur_close(&pcur); mtr_commit(&mtr); diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index 10dd64b8b1a..5bca2a24c01 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -750,6 +750,7 @@ row_upd_sec_index_entry( btr_cur_t* btr_cur; mem_heap_t* heap; rec_t* rec; + char* err_buf; ulint err = DB_SUCCESS; index = node->index; @@ -764,18 +765,37 @@ row_upd_sec_index_entry( found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, &mtr); - ut_ad(found); - btr_cur = btr_pcur_get_btr_cur(&pcur); rec = btr_cur_get_rec(btr_cur); - /* Delete mark the old index record; it can already be delete marked if - we return after a lock wait in row_ins_index_entry below */ + if (!found) { - if (!rec_get_deleted_flag(rec)) { + err_buf = mem_alloc(1000); + dtuple_sprintf(err_buf, 900, entry); + + fprintf(stderr, "InnoDB: error in sec index entry update in\n" + "InnoDB: index %s table %s\n", index->name, + index->table->name); + fprintf(stderr, "InnoDB: tuple %s\n", err_buf); + + rec_sprintf(err_buf, 900, rec); + fprintf(stderr, "InnoDB: record %s\n", err_buf); + + fprintf(stderr, "InnoDB: Make a detailed bug report and send it\n"); + fprintf(stderr, "InnoDB: to mysql@lists.mysql.com\n"); + + mem_free(err_buf); + } else { + + /* Delete mark the old index record; it can already be + delete marked if we return after a lock wait in + row_ins_index_entry below */ + + if (!rec_get_deleted_flag(rec)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); + } } btr_pcur_close(&pcur); diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index b584b663e43..e121f509266 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -56,6 +56,7 @@ Created 2/16/1996 Heikki Tuuri #include "srv0start.h" #include "que0que.h" +ibool srv_startup_is_before_trx_rollback_phase = FALSE; ibool srv_is_being_started = FALSE; ibool srv_was_started = FALSE; @@ -531,6 +532,7 @@ innobase_start_or_create_for_mysql(void) /* yydebug = TRUE; */ srv_is_being_started = TRUE; + srv_startup_is_before_trx_rollback_phase = TRUE; if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) { srv_unix_file_flush_method = SRV_UNIX_FDATASYNC; @@ -548,6 +550,8 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } + printf("srv_unix set to %lu\n", srv_unix_file_flush_method); + os_aio_use_native_aio = srv_use_native_aio; err = srv_boot(); @@ -728,6 +732,7 @@ innobase_start_or_create_for_mysql(void) trx_sys_create(); dict_create(); + srv_startup_is_before_trx_rollback_phase = FALSE; } else if (srv_archive_recovery) { fprintf(stderr, @@ -742,9 +747,15 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } - trx_sys_init_at_db_start(); + /* Since ibuf init is in dict_boot, and ibuf is needed + in any disk i/o, first call dict_boot */ + dict_boot(); + + trx_sys_init_at_db_start(); + srv_startup_is_before_trx_rollback_phase = FALSE; + recv_recovery_from_archive_finish(); } else { /* We always try to do a recovery, even if the database had @@ -759,12 +770,15 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } - trx_sys_init_at_db_start(); + /* Since ibuf init is in dict_boot, and ibuf is needed + in any disk i/o, first call dict_boot */ dict_boot(); + trx_sys_init_at_db_start(); /* The following needs trx lists which are initialized in trx_sys_init_at_db_start */ - + + srv_startup_is_before_trx_rollback_phase = FALSE; recv_recovery_from_checkpoint_finish(); } diff --git a/innobase/sync/sync0sync.c b/innobase/sync/sync0sync.c index 7153355d2a9..c3a1ac3b47f 100644 --- a/innobase/sync/sync0sync.c +++ b/innobase/sync/sync0sync.c @@ -166,6 +166,46 @@ struct sync_level_struct{ ulint level; /* level of the latch in the latching order */ }; + +#if defined(__GNUC__) && defined(UNIV_INTEL_X86) + +ulint +sync_gnuc_intelx86_test_and_set( + /* out: old value of the lock word */ + ulint* lw) /* in: pointer to the lock word */ +{ + ulint res; + + /* In assembly we use the so-called AT & T syntax where + the order of operands is inverted compared to the ordinary Intel + syntax. The 'l' after the mnemonics denotes a 32-bit operation. + The line after the code tells which values come out of the asm + code, and the second line tells the input to the asm code. */ + + asm volatile("movl $1, %%eax; xchgl (%%ecx), %%eax" : + "=eax" (res), "=m" (*lw) : + "ecx" (lw)); + return(res); +} + +void +sync_gnuc_intelx86_reset( + ulint* lw) /* in: pointer to the lock word */ +{ + /* In assembly we use the so-called AT & T syntax where + the order of operands is inverted compared to the ordinary Intel + syntax. The 'l' after the mnemonics denotes a 32-bit operation. */ + + asm volatile("movl $0, %%eax; xchgl (%%ecx), %%eax" : + "=m" (*lw) : + "ecx" (lw) : + "eax"); /* gcc does not seem to understand + that our asm code resets eax: tell it + explicitly that after the third ':' */ +} + +#endif + /********************************************************************** Creates, or rather, initializes a mutex object in a specified memory location (which must be appropriately aligned). The mutex is initialized diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index efee02c4cad..1f408428582 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -613,6 +613,10 @@ trx_undo_insert_header_reuse( /* Insert undo data is not needed after commit: we may free all the space on the page */ + ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_INSERT); + mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); @@ -800,7 +804,7 @@ trx_undo_free_page( ulint hist_size; UT_NOT_USED(hdr_offset); - ut_ad(hdr_page_no != page_no); + ut_a(hdr_page_no != page_no); ut_ad(!mutex_own(&kernel_mutex)); ut_ad(mutex_own(&(rseg->mutex))); @@ -1411,6 +1415,10 @@ trx_undo_reuse_cached( if (type == TRX_UNDO_INSERT) { offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); } else { + ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_TYPE) + == TRX_UNDO_UPDATE); + offset = trx_undo_header_create(undo_page, trx_id, mtr); } From 3b5cee2e49f25b078120e7a8e1b13f11c3716504 Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 24 Jun 2001 20:09:41 +0300 Subject: [PATCH 07/11] univ.i Added a new debug define option sync0sync.ic Do not use GCC in-line assembly srv0start.h Eliminate a deadlock of threads at startup buf0buf.h Add some debug functions srv0start.c Remove a printf innobase/srv/srv0start.c: Remove a printf innobase/include/buf0buf.h: Add some debug functions innobase/include/srv0start.h: Eliminate a deadlock of threads at startup innobase/include/sync0sync.ic: Do not use GCC in-line assembly innobase/include/univ.i: Added a new debug define option --- innobase/include/buf0buf.h | 29 +++++++++++++++++++++++++++++ innobase/include/srv0start.h | 3 +++ innobase/include/sync0sync.ic | 4 ++-- innobase/include/univ.i | 2 ++ innobase/srv/srv0start.c | 3 ++- 5 files changed, 38 insertions(+), 3 deletions(-) diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 5e90f5952fc..7f3e20a4505 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -293,6 +293,32 @@ buf_page_peek_block( ulint space, /* in: space id */ ulint offset);/* in: page number */ /************************************************************************ +Sets file_page_was_freed TRUE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_set_file_page_was_freed( +/*=============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset); /* in: page number */ +/************************************************************************ +Sets file_page_was_freed FALSE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ + +buf_block_t* +buf_page_reset_file_page_was_freed( +/*===============================*/ + /* out: control block if found from page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset); /* in: page number */ +/************************************************************************ Recommends a move of a block to the start of the LRU list if there is danger of dropping from the buffer pool. NOTE: does not reserve the buffer pool mutex. */ @@ -706,6 +732,9 @@ struct buf_block_struct{ which bufferfixes the block acquires an s-latch here; so we can use the debug utilities in sync0rw */ + ibool file_page_was_freed; + /* this is set to TRUE when fsp + frees a page in buffer pool */ }; /* The buffer pool structure. NOTE! The definition appears here only for diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h index 66eeb4f2e3c..e2b20f3b5fc 100644 --- a/innobase/include/srv0start.h +++ b/innobase/include/srv0start.h @@ -28,4 +28,7 @@ int innobase_shutdown_for_mysql(void); /*=============================*/ /* out: DB_SUCCESS or error code */ + +extern ibool srv_startup_is_before_trx_rollback_phase; + #endif diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic index b58d024bf6c..f7b341cb386 100644 --- a/innobase/include/sync0sync.ic +++ b/innobase/include/sync0sync.ic @@ -86,7 +86,7 @@ mutex_test_and_set( /* mutex_fence(); */ return(res); -#elif defined(__GNUC__) && defined(UNIV_INTEL_X86) +#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86) ulint* lw; ulint res; @@ -134,7 +134,7 @@ mutex_reset_lock_word( __asm MOV EDX, 0 __asm MOV ECX, lw __asm XCHG EDX, DWORD PTR [ECX] -#elif defined(__GNUC__) && defined(UNIV_INTEL_X86) +#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86) ulint* lw; lw = &(mutex->lock_word); diff --git a/innobase/include/univ.i b/innobase/include/univ.i index 9ded084d044..73bf48b1bc0 100644 --- a/innobase/include/univ.i +++ b/innobase/include/univ.i @@ -74,6 +74,8 @@ subdirectory of 'mysql'. */ #define UNIV_SYNC_PERF_STAT #define UNIV_SEARCH_PERF_STAT + +#define UNIV_DEBUG_FILE_ACCESSES */ #define UNIV_LIGHT_MEM_DEBUG diff --git a/innobase/srv/srv0start.c b/innobase/srv/srv0start.c index e121f509266..80fafa37adf 100644 --- a/innobase/srv/srv0start.c +++ b/innobase/srv/srv0start.c @@ -550,8 +550,9 @@ innobase_start_or_create_for_mysql(void) return(DB_ERROR); } + /* printf("srv_unix set to %lu\n", srv_unix_file_flush_method); - + */ os_aio_use_native_aio = srv_use_native_aio; err = srv_boot(); From 5b47d101e57b2f8bbac85041fa3b41937023b9dc Mon Sep 17 00:00:00 2001 From: unknown Date: Sun, 24 Jun 2001 20:45:57 +0300 Subject: [PATCH 08/11] mysqld.cc Put OPT_INNODB_UNIX_FILE_FLUSH_METHOD to mysqld.cc enum type sql/mysqld.cc: Put OPT_INNODB_UNIX_FILE_FLUSH_METHOD to mysqld.cc enum type --- sql/mysqld.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql/mysqld.cc b/sql/mysqld.cc index e11da82f43f..991b0e73c51 100644 --- a/sql/mysqld.cc +++ b/sql/mysqld.cc @@ -2455,7 +2455,7 @@ enum options { OPT_INNODB_LOG_ARCH_DIR, OPT_INNODB_LOG_ARCHIVE, OPT_INNODB_FLUSH_LOG_AT_TRX_COMMIT, - OPT_innodb_flush_method, + OPT_INNODB_UNIX_FILE_FLUSH_METHOD, OPT_SAFE_SHOW_DB, OPT_GEMINI_SKIP, OPT_INNODB_SKIP, OPT_TEMP_POOL, OPT_TX_ISOLATION, From 91239689e55a7cf8cc245f8311ed6e59a4d195b1 Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 27 Jun 2001 18:21:42 -0500 Subject: [PATCH 09/11] manual.texi add LIKE example illustrating case sensitivity. Docs/manual.texi: add LIKE example illustrating case sensitivity. BitKeeper/etc/logging_ok: Logging to logging@openlogging.org accepted --- BitKeeper/etc/logging_ok | 1 + Docs/manual.texi | 10 ++++++++++ 2 files changed, 11 insertions(+) diff --git a/BitKeeper/etc/logging_ok b/BitKeeper/etc/logging_ok index 83d7b6d611c..7dc05f47709 100644 --- a/BitKeeper/etc/logging_ok +++ b/BitKeeper/etc/logging_ok @@ -6,3 +6,4 @@ paul@teton.kitebird.com monty@hundin.mysql.fi sasha@mysql.sashanet.com monty@work.mysql.com +paul@central.snake.net diff --git a/Docs/manual.texi b/Docs/manual.texi index 9f91ab8aa0c..4e6206832a8 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -16690,6 +16690,16 @@ mysql> select 'David_' LIKE 'David|_' ESCAPE '|'; -> 1 @end example +The following two statements illustrate that string comparisons are +case insensitive unless one of the operands is a binary string: + +@example +mysql> select 'abc' LIKE 'ABC'; + -> 1 +mysql> SELECT 'abc' LIKE BINARY 'ABC'; + -> 0 +@end example + @code{LIKE} is allowed on numeric expressions! (This is a @strong{MySQL} extension to the ANSI SQL @code{LIKE}.) From 93edcc74ef6d4c76cd07a31f908667804ff4164e Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 28 Jun 2001 10:58:04 +0300 Subject: [PATCH 10/11] Cleanup myisam/myisamchk.c: --force now implices --update sql/share/italian/errmsg.txt: Update of error messages --- Docs/manual.texi | 2421 +++++++++++++++++----------------- myisam/myisamchk.c | 7 +- sql/share/italian/errmsg.txt | 20 +- 3 files changed, 1245 insertions(+), 1203 deletions(-) diff --git a/Docs/manual.texi b/Docs/manual.texi index 9f91ab8aa0c..6843438b416 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -495,9 +495,9 @@ MySQL Table Types * MERGE:: MERGE tables * ISAM:: ISAM tables * HEAP:: HEAP tables +* InnoDB:: InnoDB tables * BDB:: BDB or Berkeley_db tables * GEMINI:: GEMINI tables -* InnoDB:: InnoDB tables MyISAM Tables @@ -516,38 +516,6 @@ MyISAM table problems. * Corrupted MyISAM tables:: * MyISAM table close:: -BDB or Berkeley_DB Tables - -* BDB overview:: Overview of BDB Tables -* BDB install:: Installing BDB -* BDB start:: BDB startup options -* BDB characteristic:: Some characteristic of @code{BDB} tables: -* BDB TODO:: Some things we need to fix for BDB in the near future: -* BDB portability:: Operating systems supported by @strong{BDB} -* BDB errors:: Errors You May Get When Using BDB Tables - -GEMINI Tables - -* GEMINI Overview:: -* Using GEMINI Tables:: - -GEMINI Overview - -* GEMINI Features:: -* GEMINI Concepts:: -* GEMINI Limitations:: - -Using GEMINI Tables - -* Startup Options:: -* Creating GEMINI Tables:: -* Backing Up GEMINI Tables:: -* Restoring GEMINI Tables:: -* Using Auto_Increment Columns With GEMINI Tables:: -* Performance Considerations:: -* Sample Configurations:: -* When To Use GEMINI Tables:: - InnoDB Tables * InnoDB overview:: InnoDB tables overview @@ -594,6 +562,38 @@ File space management and disk i/o * InnoDB File space:: * InnoDB File Defragmenting:: +BDB or Berkeley_DB Tables + +* BDB overview:: Overview of BDB Tables +* BDB install:: Installing BDB +* BDB start:: BDB startup options +* BDB characteristic:: Some characteristic of @code{BDB} tables: +* BDB TODO:: Some things we need to fix for BDB in the near future: +* BDB portability:: Operating systems supported by @strong{BDB} +* BDB errors:: Errors You May Get When Using BDB Tables + +GEMINI Tables + +* GEMINI Overview:: +* Using GEMINI Tables:: + +GEMINI Overview + +* GEMINI Features:: +* GEMINI Concepts:: +* GEMINI Limitations:: + +Using GEMINI Tables + +* Startup Options:: +* Creating GEMINI Tables:: +* Backing Up GEMINI Tables:: +* Restoring GEMINI Tables:: +* Using Auto_Increment Columns With GEMINI Tables:: +* Performance Considerations:: +* Sample Configurations:: +* When To Use GEMINI Tables:: + MySQL Tutorial * Connecting-disconnecting:: Connecting to and disconnecting from the server @@ -2289,8 +2289,9 @@ The Berkeley DB code is very stable, but we are still improving the interface between @strong{MySQL} and BDB tables, so it will take some time before this is as tested as the other table types. -@item InnoDB Tables -- Alpha -This is a very recent addition to @code{MySQL} and is not very tested yet. +@item InnoDB Tables -- Beta +This is a recent addition to @code{MySQL}. They appear to work good and +can be used after some initial testing. @item Automatic recovery of MyISAM tables - Beta This only affects the new code that checks if the table was closed properly @@ -6439,6 +6440,7 @@ shell> CXXFLAGS=-DDONT_USE_DEFAULT_FIELDS ./configure @cindex character sets @findex configure option, --with-charset +@findex configure option, --with-extra-charset @item By default, @strong{MySQL} uses the ISO-8859-1 (Latin1) character set. To change the default set, use the @code{--with-charset} option: @@ -6464,6 +6466,13 @@ indexes may be sorted incorrectly otherwise. (This can happen if you install @strong{MySQL}, create some tables, then reconfigure @strong{MySQL} to use a different character set and reinstall it.) +With the option @code{--with-extra-charset=LIST} you can define +which additional character sets should be incompiled in the server. + +Here @code{LIST} is either a list of character set separated with space, +@code{complex} to include all characters that can't be dynamicly loaded +or @code{all} to include all character sets into the binaries. + @item To configure @strong{MySQL} with debugging code, use the @code{--with-debug} option: @@ -6483,8 +6492,8 @@ applications. @xref{Thread-safe clients}. @item Options that pertain to particular systems can be found in the -system-specific sections later in this chapter. -@xref{Source install system issues}. +system-specific sections later in this chapter. @xref{Source install +system issues}. @end itemize @node Installing source tree, Compilation problems, Installing source, Installing @@ -7327,6 +7336,10 @@ with @code{--static}. If you try to do so, you will get the error: @example ld: fatal: library -ldl: not found + +or + +undefined reference to `dlopen' @end example If too many processes try to connect very rapidly to @code{mysqld}, you will @@ -8252,6 +8265,17 @@ CC=gcc CXX=gcc CXXFLAGS=-O3 \ ./configure --prefix=/usr/local/mysql --with-thread-safe-client --with-named-thread-libs=-lpthread @end example +On Irix 6.5.11 with native Irix C and C++ compilers ver. 7.3.1.2, the +following is reported to work + +@example +CC=cc CXX=CC CFLAGS='-O3 -n32 -TARG:platform=IP22 -I/usr/local/include \ +-L/usr/local/lib' CXXFLAGS='-O3 -n32 -TARG:platform=IP22 \ +-I/usr/local/include -L/usr/local/lib' ./configure --prefix=/usr/local/mysql \ +--with-berkeley-db --with-innodb \ +--with-libwrap=/usr/local --with-named-curses-libs=/usr/local/lib/libncurses.a +@end example + @node FreeBSD, NetBSD, SGI-Irix, Source install system issues @subsection FreeBSD Notes @@ -9500,11 +9524,6 @@ it would be nice if you could also add default options on the command line. For the moment, the workaround is to list the parameters in the @file{C:\my.cnf} file instead. -@item -When you suspend a laptop running Win95, the @code{mysqld} daemon doesn't -accept new connections when the laptop is resumed. We don't know if this -is a problem with Win95, TCP/IP, or @strong{MySQL}. - @item It would be real nice to be able to kill @code{mysqld} from the task manager. For the moment, you must use @code{mysqladmin shutdown}. @@ -10191,15 +10210,15 @@ library and for which @strong{MySQL} must be configured to use MIT-pthreads. If you can't get @code{mysqld} to start you can try to make a trace file to find the problem. @xref{Making trace files}. +If you are using InnoDB tables, refer to the InnoDB-specific startup +options. @xref{InnoDB start}. + If you are using BDB (Berkeley DB) tables, you should familiarize yourself with the different BDB specific startup options. @xref{BDB start}. If you are using Gemini tables, refer to the Gemini-specific startup options. @xref{Using GEMINI Tables}. -If you are using InnoDB tables, refer to the InnoDB-specific startup -options. @xref{InnoDB start}. - @node Automatic start, Command-line options, Starting server, Post-installation @subsection Starting and Stopping MySQL Automatically @cindex starting, the server automatically @@ -20658,9 +20677,9 @@ Version 3.22.15. It is a @strong{MySQL} extension to ANSI SQL92. @code{INSERT DELAYED} only works with @code{ISAM} and @code{MyISAM} tables. Note that as @code{MyISAM} tables supports concurrent -@code{SELECT} and @code{INSERT}, if there is no empty blocks in the data -file, you very seldom need to use @code{INSERT DELAYED} with -@code{MyISAM}. +@code{SELECT} and @code{INSERT}, if there is no free blocks in the +middle of the data file, you very seldom need to use @code{INSERT +DELAYED} with @code{MyISAM}. @xref{MyISAM}. When you use @code{INSERT DELAYED}, the client will get an OK at once and the row will be inserted when the table is not in use by any other thread. @@ -23708,9 +23727,9 @@ of both worlds. * MERGE:: MERGE tables * ISAM:: ISAM tables * HEAP:: HEAP tables +* InnoDB:: InnoDB tables * BDB:: BDB or Berkeley_db tables * GEMINI:: GEMINI tables -* InnoDB:: InnoDB tables @end menu @node MyISAM, MERGE, Table types, Table types @@ -23734,8 +23753,12 @@ the table was closed correctly. If @code{mysqld} is started with @code{--myisam-recover}, @code{MyISAM} tables will automatically be checked and/or repaired on open if the table wasn't closed properly. @item -You can @code{INSERT} new rows in a table without deleted rows, -while other threads are reading from the table. +You can @code{INSERT} new rows in a table that doesn't have free blocks +in the middle of the data file, at the same time other threads are +reading from the table (concurrent insert). An free block can come from +an update of a dynamic length row with much data to a row with less data +or when deleting rows. When all free blocks are used up, all future +inserts will be concurrent again. @item Support for big files (63-bit) on filesystems/operating systems that support big files. @@ -24461,1137 +24484,6 @@ SUM_OVER_ALL_KEYS(max_length_of_key + sizeof(char*) * 2) @code{sizeof(char*)} is 4 on 32-bit machines and 8 on 64-bit machines. -@cindex tables, @code{BDB} -@cindex tables, @code{Berkeley DB} -@node BDB, GEMINI, HEAP, Table types -@section BDB or Berkeley_DB Tables - -@menu -* BDB overview:: Overview of BDB Tables -* BDB install:: Installing BDB -* BDB start:: BDB startup options -* BDB characteristic:: Some characteristic of @code{BDB} tables: -* BDB TODO:: Some things we need to fix for BDB in the near future: -* BDB portability:: Operating systems supported by @strong{BDB} -* BDB errors:: Errors You May Get When Using BDB Tables -@end menu - -@node BDB overview, BDB install, BDB, BDB -@subsection Overview of BDB Tables - -Support for BDB tables is included in the @strong{MySQL} source distribution -starting from Version 3.23.34 and is activated in the @strong{MySQL}-Max -binary. - -BerkeleyDB, available at @uref{http://www.sleepycat.com/} has provided -@strong{MySQL} with a transactional table handler. By using BerkeleyDB -tables, your tables may have a greater chance of surviving crashes, and also -provides @code{COMMIT} and @code{ROLLBACK} on transactions. The -@strong{MySQL} source distribution comes with a BDB distribution that has a -couple of small patches to make it work more smoothly with @strong{MySQL}. -You can't use a non-patched @code{BDB} version with @strong{MySQL}. - -We at @strong{MySQL AB} are working in close cooperation with Sleepycat to -keep the quality of the @strong{MySQL}/BDB interface high. - -When it comes to supporting BDB tables, we are committed to help our -users to locate the problem and help creating a reproducable test case -for any problems involving BDB tables. Any such test case will be -forwarded to Sleepycat who in turn will help us find and fix the -problem. As this is a two stage operation, any problems with BDB tables -may take a little longer for us to fix than for other table handlers. -However, as the BerkeleyDB code itself has been used by many other -applications than @strong{MySQL}, we don't envision any big problems with -this. @xref{Table handler support}. - -@node BDB install, BDB start, BDB overview, BDB -@subsection Installing BDB - -If you have downloaded a binary version of @strong{MySQL} that includes -support for BerkeleyDB, simply follow the instructions for installing a -binary version of @strong{MySQL}. -@xref{Installing binary}. @xref{mysqld-max, , @code{mysqld-max}}. - -To compile @strong{MySQL} with Berkeley DB support, download @strong{MySQL} -Version 3.23.34 or newer and configure @code{MySQL} with the -@code{--with-berkeley-db} option. @xref{Installing source}. - -@example -cd /path/to/source/of/mysql-3.23.34 -./configure --with-berkeley-db -@end example - -Please refer to the manual provided with the @code{BDB} distribution for -more updated information. - -Even though Berkeley DB is in itself very tested and reliable, -the @strong{MySQL} interface is still considered beta quality. -We are actively improving and optimizing it to get it stable very -soon. - -@node BDB start, BDB characteristic, BDB install, BDB -@subsection BDB startup options - -If you are running with @code{AUTOCOMMIT=0} then your changes in @code{BDB} -tables will not be updated until you execute @code{COMMIT}. Instead of commit -you can execute @code{ROLLBACK} to forget your changes. @xref{COMMIT}. - -If you are running with @code{AUTOCOMMIT=1} (the default), your changes -will be committed immediately. You can start an extended transaction with -the @code{BEGIN WORK} SQL command, after which your changes will not be -committed until you execute @code{COMMIT} (or decide to @code{ROLLBACK} -the changes). - -The following options to @code{mysqld} can be used to change the behavior of -BDB tables: - -@multitable @columnfractions .30 .70 -@item @strong{Option} @tab @strong{Meaning} -@item @code{--bdb-home=directory} @tab Base directory for BDB tables. This should be the same directory you use for --datadir. -@item @code{--bdb-lock-detect=#} @tab Berkeley lock detect. One of (DEFAULT, OLDEST, RANDOM, or YOUNGEST). -@item @code{--bdb-logdir=directory} @tab Berkeley DB log file directory. -@item @code{--bdb-no-sync} @tab Don't synchronously flush logs. -@item @code{--bdb-no-recover} @tab Don't start Berkeley DB in recover mode. -@item @code{--bdb-shared-data} @tab Start Berkeley DB in multi-process mode (Don't use @code{DB_PRIVATE} when initializing Berkeley DB) -@item @code{--bdb-tmpdir=directory} @tab Berkeley DB tempfile name. -@item @code{--skip-bdb} @tab Don't use berkeley db. -@item @code{-O bdb_max_lock=1000} @tab Set the maximum number of locks possible. @xref{SHOW VARIABLES}. -@end multitable - -If you use @code{--skip-bdb}, @strong{MySQL} will not initialize the -Berkeley DB library and this will save a lot of memory. Of course, -you cannot use @code{BDB} tables if you are using this option. - -Normally you should start @code{mysqld} without @code{--bdb-no-recover} if you -intend to use BDB tables. This may, however, give you problems when you -try to start @code{mysqld} if the BDB log files are corrupted. @xref{Starting -server}. - -With @code{bdb_max_lock} you can specify the maximum number of locks -(10000 by default) you can have active on a BDB table. You should -increase this if you get errors of type @code{bdb: Lock table is out of -available locks} or @code{Got error 12 from ...} when you have do long -transactions or when @code{mysqld} has to examine a lot of rows to -calculate the query. - -You may also want to change @code{binlog_cache_size} and -@code{max_binlog_cache_size} if you are using big multi-line transactions. -@xref{COMMIT}. - -@node BDB characteristic, BDB TODO, BDB start, BDB -@subsection Some characteristic of @code{BDB} tables: - -@itemize @bullet -@item -To be able to rollback transactions BDB maintain log files. For maximum -performance you should place these on another disk than your databases -by using the @code{--bdb_log_dir} options. -@item -@strong{MySQL} performs a checkpoint each time a new BDB log -file is started, and removes any log files that are not needed for -current transactions. One can also run @code{FLUSH LOGS} at any time -to checkpoint the Berkeley DB tables. - -For disaster recovery, one should use table backups plus -@strong{MySQL}'s binary log. @xref{Backup}. - -@strong{Warning}: If you delete old log files that are in use, BDB will -not be able to do recovery at all and you may loose data if something -goes wrong. -@item -@strong{MySQL} requires a @code{PRIMARY KEY} in each BDB table to be -able to refer to previously read rows. If you don't create one, -@strong{MySQL} will create an maintain a hidden @code{PRIMARY KEY} for -you. The hidden key has a length of 5 bytes and is incremented for each -insert attempt. -@item -If all columns you access in a @code{BDB} table are part of the same index or -part of the primary key, then @strong{MySQL} can execute the query -without having to access the actual row. In a @code{MyISAM} table the -above holds only if the columns are part of the same index. -@item -The @code{PRIMARY KEY} will be faster than any other key, as the -@code{PRIMARY KEY} is stored together with the row data. As the other keys are -stored as the key data + the @code{PRIMARY KEY}, it's important to keep the -@code{PRIMARY KEY} as short as possible to save disk and get better speed. -@item -@code{LOCK TABLES} works on @code{BDB} tables as with other tables. If -you don't use @code{LOCK TABLE}, @strong{MYSQL} will issue an internal -multiple-write lock on the table to ensure that the table will be -properly locked if another thread issues a table lock. -@item -Internal locking in @code{BDB} tables is done on page level. -@item -@code{SELECT COUNT(*) FROM table_name} is slow as @code{BDB} tables doesn't -maintain a count of the number of rows in the table. -@item -Scanning is slower than with @code{MyISAM} tables as one has data in BDB -tables stored in B-trees and not in a separate data file. -@item -The application must always be prepared to handle cases where -any change of a @code{BDB} table may make an automatic rollback and any -read may fail with a deadlock error. -@item -Keys are not compressed to previous keys as with ISAM or MyISAM -tables. In other words, the key information will take a little more -space in @code{BDB} tables compared to MyISAM tables which don't use -@code{PACK_KEYS=0}. -@item -There is often holes in the BDB table to allow you to insert new rows in -the middle of the key tree. This makes BDB tables somewhat larger than -MyISAM tables. -@item -The optimizer needs to know an approximation of the number of rows in -the table. @strong{MySQL} solves this by counting inserts and -maintaining this in a separate segment in each BDB table. If you don't -do a lot of @code{DELETE} or @code{ROLLBACK}:s this number should be -accurate enough for the @strong{MySQL} optimizer, but as @strong{MySQL} -only store the number on close, it may be wrong if @strong{MySQL} dies -unexpectedly. It should not be fatal even if this number is not 100 % -correct. One can update the number of rows by executing @code{ANALYZE -TABLE} or @code{OPTIMIZE TABLE}. @xref{ANALYZE TABLE} . @xref{OPTIMIZE -TABLE}. -@item -If you get full disk with a @code{BDB} table, you will get an error -(probably error 28) and the transaction should roll back. This is in -contrast with @code{MyISAM} and @code{ISAM} tables where @code{mysqld} will -wait for enough free disk before continuing. -@end itemize - -@node BDB TODO, BDB portability, BDB characteristic, BDB -@subsection Some things we need to fix for BDB in the near future: - -@itemize @bullet -@item -It's very slow to open many BDB tables at the same time. If you are -going to use BDB tables, you should not have a very big table cache (> -256 ?) and you should use @code{--no-auto-rehash} with the @code{mysql} -client. We plan to partly fix this in 4.0. -@item -@code{SHOW TABLE STATUS} doesn't yet provide that much information for BDB -tables. -@item -Optimize performance. -@item -Change to not use page locks at all when we are scanning tables. -@end itemize - -@node BDB portability, BDB errors, BDB TODO, BDB -@subsection Operating systems supported by @strong{BDB} - -If you after having built @strong{MySQL} with support for BDB tables get -the following error in the log file when you start @code{mysqld}: - -@example -bdb: architecture lacks fast mutexes: applications cannot be threaded -Can't init dtabases -@end example - -This means that @code{BDB} tables are not supported for your architecture. -In this case you have to rebuild @strong{MySQL} without BDB table support. - -NOTE: The following list is not complete; We will update this as we get -more information about this. - -Currently we know that BDB tables works with the following operating -system. - -@itemize @bullet -@item -Linux 2.x intel -@item -Solaris sparc -@item -SCO OpenServer -@item -SCO UnixWare 7.0.1 -@end itemize - -It doesn't work with the following operating systems: - -@itemize @bullet -@item -Linux 2.x Alpha -@item -Max OS X -@end itemize - -@node BDB errors, , BDB portability, BDB -@subsection Errors You May Get When Using BDB Tables - -@itemize @bullet -@item -If you get the following error in the @code{hostname.err log} when -starting @code{mysqld}: - -@example -bdb: Ignoring log file: .../log.XXXXXXXXXX: unsupported log version # -@end example -it means that the new @code{BDB} version doesn't support the old log -file format. In this case you have to delete all @code{BDB} log BDB -from your database directory (the files that has the format -@code{log.XXXXXXXXXX} ) and restart @code{mysqld}. We would also -recommend you to do a @code{mysqldump --opt} of your old @code{BDB} -tables, delete the old table and restore the dump. -@item -If you are running in not @code{auto_commit} mode and delete a table you -are using by another thread you may get the following error messages in -the @strong{MySQL} error file: - -@example -001119 23:43:56 bdb: Missing log fileid entry -001119 23:43:56 bdb: txn_abort: Log undo failed for LSN: 1 3644744: Invalid -@end example - -This is not fatal but we don't recommend that you delete tables if you are -not in @code{auto_commit} mode, until this problem is fixed (the fix is -not trivial). -@end itemize - -@cindex GEMINI tables -@node GEMINI, InnoDB, BDB, Table types -@section GEMINI Tables - -@cindex GEMINI tables, overview -@menu -* GEMINI Overview:: -* Using GEMINI Tables:: -@end menu - -@node GEMINI Overview, Using GEMINI Tables, GEMINI, GEMINI -@subsection GEMINI Overview - -@code{GEMINI} is a transaction-safe table handler for @strong{MySQL}. It -provides row-level locking, robust transaction support and reliable -crash recovery. It is targeted for databases that need to handle heavy -multi-user updates typical of transaction processing applications while -still providing excellent performance for read-intensive operations. The -@code{GEMINI} table type is developed and supported by NuSphere -Corporation (see @url{http://www.nusphere.com}). - -@code{GEMINI} provides full ACID transaction properties (Atomic, -Consistent, Independent, and Durable) with a programming model that -includes support for statement atomicity and all four standard isolation -levels (Read Uncommitted, Read Committed, Repeatable Read, and -Serializable) defined in the SQL standard. - -The @code{GEMINI} tables support row-level and table-level locking to -increase concurrency in applications and allow reading of tables without -locking for maximum concurrency in a heavy update environment. The -transaction, locking, and recovery mechanisms are tightly integrated to -eliminate unnecessary administration overhead. - -In general, if @code{GEMINI} tables are selected for an application, it -is recommended that all tables updated in the application be -@code{GEMINI} tables to provide well-defined system behavior. If -non-@code{GEMINI} tables are mixed into the application then, ACID -transaction properties cannot be maintained. While there are clearly -cases where mixing table types is appropriate, it should always be done -with careful consideration of the impact on transaction consistency and -recoverability needs of the application and underlying database. - -The @code{GEMINI} table type is derived from a successful commercial -database and uses the storage kernel technology tightly integrated with -@strong{MySQL} server. The basic @code{GEMINI} technology is in use by -millions of users worldwide in production environments today. This -maturity allows @code{GEMINI} tables to provide a solution for those -users who require transaction-based behavior as part of their -applications. - -The @code{GEMINI} table handler supports a configurable data cache that -allows a significant portion of any database to be maintained in memory -while still allowing durable updates. - -@cindex GEMINI tables, features -@menu -* GEMINI Features:: -* GEMINI Concepts:: -* GEMINI Limitations:: -@end menu - -@node GEMINI Features, GEMINI Concepts, GEMINI Overview, GEMINI Overview -@subsubsection GEMINI Features - -The following summarizes the major features provided by @code{GEMINI} -tables. - -@itemize @bullet -@item -Supports all optimization statistics used by the @strong{MySQL} optimizer -including table cardinality, index range estimates and multi-component -selectivity to insure optimal query performance. - -@item -Maintains exact cardinality information for each table so @code{SELECT -COUNT(*) FROM} table-name always returns an answer immediately. - -@item -Supports index-only queries; when index data is sufficient to resolve a -query no record data is read (for non character types). - -@item -@code{GEMINI} uses block based I/O for better performance. There is no -performance penalty for using @code{VARCHAR} fields. The maximum record size is -currently 32K. - -@item -The number of rows in a single @code{GEMINI} table can be 4 quintillion -(full use of 64 bits). - -@item -Individual tables can be as large as 16 petabytes. - -@item -Locking is done at a record or row level rather than at table level -unless table locks are explicitly requested. When a row is inserted into -a table, other rows can be updated, inserted or deleted without waiting -for the inserted row to be committed. - -@item -Provides durable transactions backed by a crash recovery mechanism that -returns the database to a known consistent state in the event of an -unexpected failure. - -@item -Support for all isolation levels and statement atomicity defined in the -SQL standard. - -@item -Reliable Master Replication; the master database can survive system -failure and recover all committed transactions. -@end itemize - -@cindex GEMINI tables, concepts -@node GEMINI Concepts, GEMINI Limitations, GEMINI Features, GEMINI Overview -@subsubsection GEMINI Concepts - -This section highlights some of the important concepts behind -@code{GEMINI} and the @code{GEMINI} programming model, including: - -@itemize @bullet -@item -ACID Transactions -@item -Transaction COMMIT/ROLLBACK -@item -Statement Atomicity -@item -Recovery -@item -Isolation Levels -@item -Row-Level Locking -@end itemize - -These features are described below. - -@cindex GEMINI tables, ACID transactions -@noindent -@strong{ACID Transactions} - -ACID in the context of transactions is an acronym which stands for -@emph{Atomicity}, @emph{Consistency}, @emph{Isolation}, @emph{Durability}. - -@multitable @columnfractions .25 .75 -@item @sc{Attribute} @tab @sc{Description} -@item -@strong{Atomicity} -@tab A transaction allows for the grouping of one or more changes to -tables and rows in the database to form an atomic or indivisible -operation. That is, either all of the changes occur or none of them -do. If for any reason the transaction cannot be completed, everything -this transaction changed can be restored to the state it was in prior to -the start of the transaction via a rollback operation. - -@item -@strong{Consistency} -@tab -Transactions always operate on a consistent view of the data and when -they end always leave the data in a consistent state. Data may be said to -be consistent as long as it conforms to a set of invariants, such as no -two rows in the customer table have the same customer ID and all orders -have an associated customer row. While a transaction executes, these -invariants may be violated, but no other transaction will be allowed to -see these inconsistencies, and all such inconsistencies will have been -eliminated by the time the transaction ends. - -@item -@strong{Isolation} -@tab To a given transaction, it should appear as though it is running -all by itself on the database. The effects of concurrently running -transactions are invisible to this transaction, and the effects of this -transaction are invisible to others until the transaction is committed. - -@item -@strong{Durability} -@tab Once a transaction is committed, its effects are guaranteed to -persist even in the event of subsequent system failures. Until the -transaction commits, not only are any changes made by that transaction -not durable, but are guaranteed not to persist in the face of a system -failures, as crash recovery will rollback their effects. -@end multitable - -@cindex GEMINI tables, COMMIT/ROLLBACK -@noindent -@strong{Transaction COMMIT/ROLLBACK} - -As stated above, a transaction is a group of work being done to -data. Unless otherwise directed, @strong{MySQL} considers each statement -a transaction in itself. Multiple updates can be accomplished by placing -them in a single statement, however they are limited to a single table. - -Applications tend to require more robust use of transaction -concepts. Take, for example, a system that processes an order: A row may -be inserted in an order table, additional rows may be added to an -order-line table, updates may be made to inventory tables, etc. It is -important that if the order completes, all the changes are made to all -the tables involved; likewise if the order fails, none of the changes to -the tables must occur. To facilitate this requirement, @strong{MySQL} -has syntax to start a transaction called @code{BEGIN WORK}. All -statements that occur after the @code{BEGIN WORK} statement are grouped -into a single transaction. The end of this transaction occurs when a -@code{COMMIT} or @code{ROLLBACK} statement is encountered. After the -@code{COMMIT} or @code{ROLLBACK} the system returns back to the behavior -before the @code{BEGIN WORK} statement was encountered where every -statement is a transaction. - -To permanently turn off the behavior where every statement is a -transaction, @strong{MySQL} added a variable called -@code{AUTOCOMMIT}. The @code{AUTOCOMMIT} variable can have two values, -@code{1} and @code{0}. The mode where every statement is a transaction -is when @code{AUTOCOMMIT} is set to @code{1} (@code{AUTOCOMMIT=1}). When -@code{AUTOCOMMIT} is set to @code{0} (@code{AUTOCOMMIT=0}), then every -statement is part of the same transaction until the transaction end by -either @code{COMMIT} or @code{ROLLBACK}. Once a transaction completes, a -new transaction is immediately started and the process repeats. - -Here is an example of the SQL statements that you may find in a typical -order: - -@example -BEGIN WORK; - INSERT INTO order VALUES ...; - INSERT INTO order-lines VALUES ...; - INSERT INTO order-lines VALUES ...; - INSERT INTO order-lines VALUES ...; - UPDATE inventory WHERE ...; -COMMIT; -@end example - -This example shows how to use the @code{BEGIN WORK} statement to start a -transaction. If the variable @code{AUTOCOMMIT} is set to @code{0}, then -a transaction would have been started already. In this case, the -@code{BEGIN WORK} commits the current transaction and starts a new one. - -@cindex GEMINI tables, statement atomicity -@noindent -@strong{Statement Atomicity} - -As mentioned above, when running with @code{AUTOCOMMIT} set to @code{1}, -each statement executes as a single transaction. When a statement has an -error, then all changes make by the statement must be -undone. Transactions support this behavior. Non-transaction safe table -handlers would have a partial statement update where some of the changes -from the statement would be contained in the database and other changes -from the statement would not. Work would need to be done to manually -recover from the error. - -@cindex GEMINI tables, recovery -@noindent -@strong{Recovery} - -Transactions are the basis for database recovery. Recovery is what -supports the Durability attribute of the ACID transaction. - -@code{GEMINI} uses a separate file called the Recovery Log located in -the @code{$DATADIR} directory named @code{gemini.rl}. This file -maintains the integrity of all the @code{GEMINI} tables. @code{GEMINI} -can not recover any data from non-@code{GEMINI} tables. In addition, the -@code{gemini.rl} file is used to rollback transactions in support of the -@code{ROLLBACK} statement. - -In the event of a system failure, the next time the @strong{MySQL} -server is started, @code{GEMINI} will automatically go through its -crash recovery process. The result of crash recovery is that all the -@code{GEMINI} tables will contain the latest changes made to them, and -all transactions that were open at the time of the crash will have been -rolled back. - -The @code{GEMINI} Recovery Log reuses space when it can. Space can be -reused when information in the Recovery Log is no longer needed for -crash recovery or rollback. - -@cindex GEMINI tables, isolation levels -@noindent -@strong{Isolation Levels} - -There are four isolation levels supported by @code{GEMINI}: - -@itemize @bullet -@item -READ UNCOMMITTED -@item -READ COMMITTED -@item -REPEATABLE READ -@item -SERIALIZABLE -@end itemize - -These isolation levels apply only to shared locks obtained by select -statements, excluding select for update. Statements that get exclusive -locks always retain those locks until the transaction commits or rolls -back. - -By default, @code{GEMINI} operates at the @code{READ COMMITTED} -level. You can override the default using the following command: - -@example -SET [GLOBAL | SESSION] TRANSACTION ISOLATION LEVEL [READ UNCOMMITTED | -READ COMMITTED | REPEATABLE READ | SERIALIZABLE ] -@end example - -If the @code{SESSION} qualifier used, the specified isolation level -persists for the entire session. If the @code{GLOBAL} qualifier is used, -the specified isolation level is applied to all new connections from -this point forward. Note that the specified isolation level will not -change the behavior for existing connections including the connection -that exectues the @code{SET GLOBAL TRANSACTION ISOLATION LEVEL} -statement. - -@multitable @columnfractions .30 .70 -@item @sc{Isolation Level} @tab @sc{Description} - -@item -@strong{READ UNCOMMITTED} -@tab Does not obtain any locks when reading rows. This means that if a -row is locked by another process in a transaction that has a more strict -isolation level, the @code{READ UNCOMMITTED} query will not wait until -the locks are released before reading the row. You will get an error if -attempt any updates while running at this isolation level. - -@item -@strong{READ COMMITTED} -@tab Locks the requested rows long enough to copy the row from the -database block to the client row buffer. If a @code{READ COMMITTED} -query finds that a row is locked exclusively by another process, it will -wait until either the row has been released, or the lock timeout value -has expired. - -@item -@strong{REPEATABLE READ} -@tab Locks all the rows needed to satisfy the query. These locks are -held until the transaction ends (commits or rolls back). If a -@code{REPEATABLE READ} query finds that a row is locked exclusively by -another process, it will wait until either the row has been released, or -the lock timeout value has expired. - -@item -@strong{SERIALIZABLE} -@tab Locks the table that contains the rows needed to satisfy the -query. This lock is held until the transaction ends (commits or rolls -back). If a @code{SERIALIZABLE} query finds that a row is exclusively -locked by another process, it will wait until either the row has been -released, or the lock timeout value has expired. -@end multitable - -The statements that get exclusive locks are @code{INSERT}, -@code{UPDATE}, @code{DELETE} and @code{SELECT ... FOR UPDATE}. Select -statements without the @code{FOR UPDATE} qualifier get shared locks -which allow other not ''for update'' select statements to read the same -rows but block anyone trying to update the row from accessing it. Rows -or tables with exclusive locks block all access to the row from other -transactions until the transaction ends. - -In general terms, the higher the Isolation level the more likelihood of -having concurrent locks and therefore lock conflicts. In such cases, -adjust the @code{-O gemini_lock_table_size} accordingly. - -@cindex GEMINI tables, row-level locking -@noindent -@strong{Row-Level Locking} - -@code{GEMINI} uses row locks, which allows high concurrency for requests -on the same table. - -In order to avoid lock table overflow, SQL statements that require -applying locks to a large number of rows should either be run at the -serializable isolation level or should be covered by a lock table -statement. - -Memory must be pre-allocated for the lock table. The mysqld server -startup option @code{-0 gemini_lock_table_size} can be used to adjust -the number of concurrent locks. - -@cindex GEMINI tables, limitations -@node GEMINI Limitations, , GEMINI Concepts, GEMINI Overview -@subsubsection GEMINI Limitations - -The following limitations are in effect for the current version of -@code{GEMINI}: - -@itemize @bullet -@item -@code{DROP DATABASE} does not work with @code{GEMINI} tables; instead, -drop all the tables in the database first, then drop the database. - -@item -Maximum number of @code{GEMINI} tables is 1012. - -@item -Maximum number of @code{GEMINI} files a server can manage is 1012. Each -table consumes one file; an additional file is consumed if the table has -any indexes defined on it. - -@item -Maximum size of BLOBs is 16MB. - -@item -@code{FULLTEXT} indexes are not supported with @code{GEMINI} tables. - -@item -There is no support for multi-component @code{AUTO_INCREMENT} fields -that provide alternating values at the component level. If you try to -create such a field, @code{GEMINI} will refuse. - -@item -@code{TEMPORARY TABLES} are not supported by @code{GEMINI}. The -statement @code{CREATE TEMPORARY TABLE ... TYPE=GEMINI} will generate -the response: @code{ERROR 1005: Can't create table '/tmp/#sqlxxxxx' -(errno: 0)}. - -@item -@code{FLUSH TABLES} has not been implemented with @code{GEMINI} tables. -@end itemize - -@cindex GEMINI tables, using -@node Using GEMINI Tables, , GEMINI Overview, GEMINI -@subsection Using GEMINI Tables - -This section explains the various startup options you can use with -@code{GEMINI} tables, how to backup @code{GEMINI} tables, some -performance considerations and sample configurations, and a brief -discussion of when to use @code{GEMINI} tables. - -Specifically, the topics covered in this section are: - -@itemize @bullet -@item -Startup Options -@item -Creating @code{GEMINI} Tables -@item -Backing Up @code{GEMINI} Tables -@item -Using Auto_Increment Columns With @code{GEMINI} Tables -@item -Performance Considerations -@item -Sample Configurations -@item -When To Use @code{GEMINI} Tables -@end itemize - -@cindex GEMINI tables, startup options -@menu -* Startup Options:: -* Creating GEMINI Tables:: -* Backing Up GEMINI Tables:: -* Restoring GEMINI Tables:: -* Using Auto_Increment Columns With GEMINI Tables:: -* Performance Considerations:: -* Sample Configurations:: -* When To Use GEMINI Tables:: -@end menu - -@node Startup Options, Creating GEMINI Tables, Using GEMINI Tables, Using GEMINI Tables -@subsubsection Startup Options - -The table below lists options to mysqld that can be used to change the -behavior of @code{GEMINI} tables. - -@multitable @columnfractions .40 .60 -@item @sc{Option} @tab @sc{Description} - -@item -@code{--default-table-type=gemini} -@tab Sets the default table handler to be @code{GEMINI}. All create -table statements will create @code{GEMINI} tables unless otherwise -specified with @code{TYPE=@var{table-type}}. As noted above, there is -currently a limitation with @code{TEMPORARY} tables using @code{GEMINI}. - -@item -@code{--gemini-flush-log-at-commit} -@tab Forces the recovery log buffers to be flushed after every -commit. This can have a serious performance penalty, so use with -caution. - -@item -@code{--gemini-recovery=FULL | NONE | FORCE} -@tab Sets the recovery mode. Default is @code{FULL}. @code{NONE} is -useful for performing repeatable batch operations because the updates -are not recorded in the recovery log. @code{FORCE} skips crash recovery -upon startup; this corrupts the database, and should be used in -emergencies only. - -@item -@code{--gemini-unbuffered-io} -@tab All database writes bypass the OS cache. This can provide a -performance boost on heavily updated systems where most of the dataset -being worked on is cached in memory with the @code{gemini_buffer_cache} -parameter. - -@item -@code{--O gemini_buffer_cache=size} -@tab Amount of memory to allocate for database buffers, including Index -and Record information. It is recommended that this number be 10% of the -total size of all @code{GEMINI} tables. Do not exceed amount of memory -on the system! - -@item -@code{--O gemini_connection_limit=#} -@tab Maximum number of connections to @code{GEMINI}; default is -@code{100}. Each connection consumes about 1K of memory. - -@item -@code{--O gemini_io_threads=#} -@tab Number of background I/O threads; default is @code{2}. Increase the -number when using @code{--gemini-unbuffered-io} - -@item -@code{--O gemini_lock_table_size=#} -@tab Sets the maximum number of concurrent locks; default is 4096. Using -@code{SET [ GLOBAL | SESSION ] TRANSACTION ISOLATION = ...} will -determine how long a program will hold row locks. - -@item -@code{--O gemini_lock_wait_timeout=seconds} -@tab Number of seconds to wait for record locks when performing queries; -default is 10 seconds. Using @code{SET [ GLOBAL | SESSION ] TRANSACTION -ISOLATION = ...} will determine how long a program will hold row locks. - -@item -@code{--skip-gemini} -@tab Do not use @code{GEMINI}. If you use @code{--skip-gemini}, @strong{MySQL} -will not initialize the @code{GEMINI} table handler, saving memory; you -cannot use @code{GEMINI} tables if you use @code{--skip-gemini}. - -@item -@code{--transaction-isolation=READ-UNCOMMITTED | READ-COMMITTED | REPEATABLE-READ | SERIALIZABLE} -@tab Sets the GLOBAL transaction isolation level for all users that -connect to the server; can be overridden with the SET ISOLATION LEVEL -statement. -@end multitable - -@cindex GEMINI tables, creating -@node Creating GEMINI Tables, Backing Up GEMINI Tables, Startup Options, Using GEMINI Tables -@subsubsection Creating GEMINI Tables - -@code{GEMINI} tables can be created by either using the @code{CREATE -TABLE} syntax or the @code{ALTER TABLE} syntax. - -@itemize @bullet -@item -The syntax for creating a @code{GEMINI} table is: - -@example -CREATE TABLE @var{table-name} (....) TYPE=GEMINI; -@end example - -@item -The syntax to convert a table to @code{GEMINI} is: - -@example -ALTER TABLE @var{table-name} TYPE=GEMINI; -@end example -@end itemize - -@xref{Tutorial}, for more information on how to create and use -@code{MySQL} tables. - -@cindex GEMINI tables, backing up -@node Backing Up GEMINI Tables, Restoring GEMINI Tables, Creating GEMINI Tables, Using GEMINI Tables -@subsubsection Backing Up GEMINI Tables - -@code{GEMINI} supports both @code{BACKUP TABLE} and @code{RESTORE TABLE} -syntax. To learn more about how to use @code{BACKUP} and @code{RESTORE}, -see @ref{BACKUP TABLE} and @ref{RESTORE TABLE}. - -To backup @code{GEMINI} tables outside of the @code{MySQL} environment, -you must first shut down the @code{MySQL} server. Once the server is -shut down, you can copy the files associated with @code{GEMINI} to a -different location. The files that make up the @code{GEMINI} table -handler are: - -@itemize @bullet -@item -All files associated with a table with a @code{.gmd} extention below the -@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, -@code{@var{table}.gmi}, and @code{@var{table}.frm} -@item -@code{gemini.db} in the @code{$DATADIR} directory -@item -@code{gemini.rl} in the @code{$DATADIR} directory -@item -@code{gemini.lg} in the @code{$DATADIR} directory -@end itemize - -All the @code{GEMINI} files must be copied together. You can not copy -just the @code{.gmi} and @code{.gmd} files to a different -@code{$DATADIR} and have them become part of a new database. You can -copy an entire @code{$DATADIR} directory to another location and start a -@strong{MySQL} server using the new @code{$DATADIR}. - -@cindex GEMINI tables, restoring -@node Restoring GEMINI Tables, Using Auto_Increment Columns With GEMINI Tables, Backing Up GEMINI Tables, Using GEMINI Tables -@subsubsection Restoring GEMINI Tables - -To restore @code{GEMINI} tables outside of the @code{MySQL} environment, -you must first shut down the @code{MySQL} server. Once the server is -shut down, you can remove all @code{GEMINI} files in the target -@code{$DATADIR} and then copy the files previously backed up into the -@code{$DATADIR} directory. - -As mentioned above, the files that make up the @code{GEMINI} table -handler are: - -@itemize @bullet -@item -All files associated with a table with a @code{.gmd} extention below the -@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, -@code{@var{table}.gmi}, and @code{@var{table}.frm} -@item -@code{gemini.db} in the @code{$DATADIR} directory -@item -@code{gemini.rl} in the @code{$DATADIR} directory -@item -@code{gemini.lg} in the @code{$DATADIR} directory -@end itemize - -When restoring a table, all the @code{GEMINI} files must be copied -together. You can not restore just the @code{.gmi} and @code{.gmd} -files. - -@cindex GEMINI tables, auto_increment -@node Using Auto_Increment Columns With GEMINI Tables, Performance Considerations, Restoring GEMINI Tables, Using GEMINI Tables -@subsubsection Using Auto_Increment Columns With GEMINI Tables - -As mentioned previously, @code{GEMINI} tables support row-level and -table-level locking to increase concurrency in applications and to allow -reading of tables without locking for maximum concurrency in heavy -update environments. This feature has several implications when working -with @code{auto_increment} tables. - -In @code{MySQL}, when a column is defined as an @code{auto_increment} -column, and a row is inserted into the table with a @code{NULL} for the -column, the @code{auto_increment} column is updated to be 1 higher than -the highest value in the column. - -With @code{MyISAM} tables, the @code{auto_increment} function is -implemented by looking in the index and finding the highest value and -adding 1 to it. This is possible because the entire @code{ISAM} table is -locked during the update period and the increment value is therefore -guaranteed to not be changing. - -With @code{GEMINI} tables, the @code{auto_increment} function is -implemented by maintaining a counter in a separate location from the -table data. Instead of looking at the highest value in the table index, -@code{GEMINI} tables look at this separately maintained counter. This -means that in a transactional model, unlike the bottleneck inherent in -the @code{MyISAM} approach, @code{GEMINI} users do @b{not} have to wait -until the transaction that added the last value either commits or -rollbacks before looking at the value. - -Two side-effects of the @code{GEMINI} implementation are: - -@itemize @bullet -@item -If an insert is done where the column with the @code{auto_increment} is -specified, and this specified value is the highest value, @code{MyISAM} -uses it as its @code{auto_increment} value, and every subsequent insert -is based on this. By contrast, @code{GEMINI} does not use this value, -but instead uses the value maintained in the separate @code{GEMINI} -counter location. - -@item -To set the counter to a specific value, you can use @code{SET -insert_id=#} and insert a new row in the table. However, as a general -rule, values should not be inserted into an @code{auto_increment} -column; the database manager should be maintaining this field, not the -application. @code{SET insert_id} is a recovery mechanism that should be -used in case of error only. -@end itemize - -Note that if you delete the row containing the maximum value for an -@code{auto_increment} column, the value will be reused with a -@code{GEMINI} table but not with a @code{MyISAM} table. - -See @ref{CREATE TABLE} for more information about creating -@code{auto_increment} columns. - -@cindex GEMINI tables, peformance considerations -@node Performance Considerations, Sample Configurations, Using Auto_Increment Columns With GEMINI Tables, Using GEMINI Tables -@subsubsection Performance Considerations - -In addition to designing the best possible application, configuration of -the data and the server startup parameters need to be considered. How -the hardware is being used can have a dramatic affect on how fast the -system will respond to queries. Disk Drives and Memory must both be -considered. - -@noindent -@strong{Disk Drives} - -For best performance, you want to spread the data out over as many disks -as possible. Using RAID 10 stripes work very well. If there are a lot of -updates then the recovery log (@code{gemini.rl}) should be on a -relatively quiet disk drive. - -To spread the data out without using RAID 10, you can do the following: - -@itemize @bullet -@item -Group all the tables into three categories: Heavy Use, Moderate Use, -Light Use. - -@item -Take the number of disk drives available and use a round-robin approach -to the three categories grouping the tables on a disk drive. The result -will be an equal distribution of Heavy/Moderate/Light tables assigned to -each disk drive. - -@item -Once the tables have been converted to @code{GEMINI} by using the -@code{ALTER TABLE TYPE=GEMINI} statements, move (@code{mv}) the -@code{.gmd} and @code{.gmi} files to a different disk drive and link -(@code{ln -s}) them back to the original directory where the @code{.frm} -file resides. - -@item -Finally, move the @code{gemini.rl} file to its quiet disk location and link -the file back to the @code{$DATADIR} directory. -@end itemize - -@noindent -@strong{Memory} - -The more data that can be placed in memory the faster the access to the -data. Figure out how large the @code{GEMINI} data is by adding up the -@code{.gmd} and @code{.gmi} file sizes. If you can, put at least 10% of -the data into memory. You allocate memory for the rows and indexes by -using the @code{gemini_buffer_cache} startup parameter. For example: - -@example -mysqld -O gemini_buffer_cache=800M -@end example - -@noindent -would allocate 800 MB of memory for the @code{GEMINI} buffer cache. - -@cindex GEMINI tables, sample configurations -@node Sample Configurations, When To Use GEMINI Tables, Performance Considerations, Using GEMINI Tables -@subsubsection Sample Configurations - -Based on the performance considerations above, we can look at some -examples for how to get the best performance out of the system when -using @code{GEMINI} tables. - -@multitable @columnfractions .30 .70 -@item @sc{Hardware} @tab @sc{Configuration} -@item -One CPU, 128MB memory, one disk drive -@tab Allocate 80MB of memory for reading and updating @code{GEMINI} -tables by starting the mysqld server with the following option: - -@example --O gemini_buffer_cache=80M -@end example - -@item -Two CPUs, 512MB memory, four disk drives -@tab Use RAID 10 to stripe the data across all available disks, or use -the method described in the performance considerations section, -above. Allocate 450MB of memory for reading/updating @code{GEMINI} -tables: - -@example --O gemini_buffer_cache=450M -@end example -@end multitable - -@cindex GEMINI tables, when to use -@node When To Use GEMINI Tables, , Sample Configurations, Using GEMINI Tables -@subsubsection When To Use GEMINI Tables - -Because the @code{GEMINI} table handler provides crash recovery and -transaction support, there is extra overhead that is not found in other -non-transaction safe table handlers. Here are some general guidelines -for when to employ @code{GEMINI} and when to use other non-transaction -safe tables (@code{NTST}). - -@multitable @columnfractions .30 .25 .45 -@item -@sc{Access Trends} @tab @sc{Table Type} @tab @sc{Reason} -@item -Read-only -@tab @code{NTST} -@tab Less overhead and faster -@item -Critical data -@tab @code{GEMINI} -@tab Crash recovery protection -@item -High concurrency -@tab @code{GEMINI} -@tab Row-level locking -@item -Heavy update -@tab @code{GEMINI} -@tab Row-level locking -@end multitable - -The table below shows how a typical application schema could be defined. - -@multitable @columnfractions .15 .30 .25 .30 -@item -@sc{Table} @tab @sc{Contents} @tab @sc{Table Type} @tab @sc{Reason} -@item -account -@tab Customer account data -@tab @code{GEMINI} -@tab Critical data, heavy update -@item -order -@tab Orders for a customer -@tab @code{GEMINI} -@tab Critical data, heavy update -@item -orderline -@tab Orderline detail for an order -@tab @code{GEMINI} -@tab Critical data, heavy update -@item -invdesc -@tab Inventory description -@tab @code{NTST} -@tab Read-only, frequent access -@item -salesrep -@tab Sales rep information -@tab @code{NTST} -@tab Infrequent update -@item -inventory -@tab Inventory information -@tab @code{GEMINI} -@tab High concurrency, critical data -@item -config -@tab System configuration -@tab @code{NTST} -@tab Read-only -@end multitable - @node InnoDB, , GEMINI, Table types @section InnoDB Tables @@ -26891,6 +25783,1142 @@ P.O.Box 800 Finland @end example +@cindex tables, @code{BDB} +@cindex tables, @code{Berkeley DB} +@node BDB, GEMINI, HEAP, Table types +@section BDB or Berkeley_DB Tables + +@menu +* BDB overview:: Overview of BDB Tables +* BDB install:: Installing BDB +* BDB start:: BDB startup options +* BDB characteristic:: Some characteristic of @code{BDB} tables: +* BDB TODO:: Some things we need to fix for BDB in the near future: +* BDB portability:: Operating systems supported by @strong{BDB} +* BDB errors:: Errors You May Get When Using BDB Tables +@end menu + +@node BDB overview, BDB install, BDB, BDB +@subsection Overview of BDB Tables + +Support for BDB tables is included in the @strong{MySQL} source distribution +starting from Version 3.23.34 and is activated in the @strong{MySQL}-Max +binary. + +BerkeleyDB, available at @uref{http://www.sleepycat.com/} has provided +@strong{MySQL} with a transactional table handler. By using BerkeleyDB +tables, your tables may have a greater chance of surviving crashes, and also +provides @code{COMMIT} and @code{ROLLBACK} on transactions. The +@strong{MySQL} source distribution comes with a BDB distribution that has a +couple of small patches to make it work more smoothly with @strong{MySQL}. +You can't use a non-patched @code{BDB} version with @strong{MySQL}. + +We at @strong{MySQL AB} are working in close cooperation with Sleepycat to +keep the quality of the @strong{MySQL}/BDB interface high. + +When it comes to supporting BDB tables, we are committed to help our +users to locate the problem and help creating a reproducable test case +for any problems involving BDB tables. Any such test case will be +forwarded to Sleepycat who in turn will help us find and fix the +problem. As this is a two stage operation, any problems with BDB tables +may take a little longer for us to fix than for other table handlers. +However, as the BerkeleyDB code itself has been used by many other +applications than @strong{MySQL}, we don't envision any big problems with +this. @xref{Table handler support}. + +@node BDB install, BDB start, BDB overview, BDB +@subsection Installing BDB + +If you have downloaded a binary version of @strong{MySQL} that includes +support for BerkeleyDB, simply follow the instructions for installing a +binary version of @strong{MySQL}. +@xref{Installing binary}. @xref{mysqld-max, , @code{mysqld-max}}. + +To compile @strong{MySQL} with Berkeley DB support, download @strong{MySQL} +Version 3.23.34 or newer and configure @code{MySQL} with the +@code{--with-berkeley-db} option. @xref{Installing source}. + +@example +cd /path/to/source/of/mysql-3.23.34 +./configure --with-berkeley-db +@end example + +Please refer to the manual provided with the @code{BDB} distribution for +more updated information. + +Even though Berkeley DB is in itself very tested and reliable, +the @strong{MySQL} interface is still considered beta quality. +We are actively improving and optimizing it to get it stable very +soon. + +@node BDB start, BDB characteristic, BDB install, BDB +@subsection BDB startup options + +If you are running with @code{AUTOCOMMIT=0} then your changes in @code{BDB} +tables will not be updated until you execute @code{COMMIT}. Instead of commit +you can execute @code{ROLLBACK} to forget your changes. @xref{COMMIT}. + +If you are running with @code{AUTOCOMMIT=1} (the default), your changes +will be committed immediately. You can start an extended transaction with +the @code{BEGIN WORK} SQL command, after which your changes will not be +committed until you execute @code{COMMIT} (or decide to @code{ROLLBACK} +the changes). + +The following options to @code{mysqld} can be used to change the behavior of +BDB tables: + +@multitable @columnfractions .30 .70 +@item @strong{Option} @tab @strong{Meaning} +@item @code{--bdb-home=directory} @tab Base directory for BDB tables. This should be the same directory you use for --datadir. +@item @code{--bdb-lock-detect=#} @tab Berkeley lock detect. One of (DEFAULT, OLDEST, RANDOM, or YOUNGEST). +@item @code{--bdb-logdir=directory} @tab Berkeley DB log file directory. +@item @code{--bdb-no-sync} @tab Don't synchronously flush logs. +@item @code{--bdb-no-recover} @tab Don't start Berkeley DB in recover mode. +@item @code{--bdb-shared-data} @tab Start Berkeley DB in multi-process mode (Don't use @code{DB_PRIVATE} when initializing Berkeley DB) +@item @code{--bdb-tmpdir=directory} @tab Berkeley DB tempfile name. +@item @code{--skip-bdb} @tab Don't use berkeley db. +@item @code{-O bdb_max_lock=1000} @tab Set the maximum number of locks possible. @xref{SHOW VARIABLES}. +@end multitable + +If you use @code{--skip-bdb}, @strong{MySQL} will not initialize the +Berkeley DB library and this will save a lot of memory. Of course, +you cannot use @code{BDB} tables if you are using this option. + +Normally you should start @code{mysqld} without @code{--bdb-no-recover} if you +intend to use BDB tables. This may, however, give you problems when you +try to start @code{mysqld} if the BDB log files are corrupted. @xref{Starting +server}. + +With @code{bdb_max_lock} you can specify the maximum number of locks +(10000 by default) you can have active on a BDB table. You should +increase this if you get errors of type @code{bdb: Lock table is out of +available locks} or @code{Got error 12 from ...} when you have do long +transactions or when @code{mysqld} has to examine a lot of rows to +calculate the query. + +You may also want to change @code{binlog_cache_size} and +@code{max_binlog_cache_size} if you are using big multi-line transactions. +@xref{COMMIT}. + +@node BDB characteristic, BDB TODO, BDB start, BDB +@subsection Some characteristic of @code{BDB} tables: + +@itemize @bullet +@item +To be able to rollback transactions BDB maintain log files. For maximum +performance you should place these on another disk than your databases +by using the @code{--bdb_log_dir} options. +@item +@strong{MySQL} performs a checkpoint each time a new BDB log +file is started, and removes any log files that are not needed for +current transactions. One can also run @code{FLUSH LOGS} at any time +to checkpoint the Berkeley DB tables. + +For disaster recovery, one should use table backups plus +@strong{MySQL}'s binary log. @xref{Backup}. + +@strong{Warning}: If you delete old log files that are in use, BDB will +not be able to do recovery at all and you may loose data if something +goes wrong. +@item +@strong{MySQL} requires a @code{PRIMARY KEY} in each BDB table to be +able to refer to previously read rows. If you don't create one, +@strong{MySQL} will create an maintain a hidden @code{PRIMARY KEY} for +you. The hidden key has a length of 5 bytes and is incremented for each +insert attempt. +@item +If all columns you access in a @code{BDB} table are part of the same index or +part of the primary key, then @strong{MySQL} can execute the query +without having to access the actual row. In a @code{MyISAM} table the +above holds only if the columns are part of the same index. +@item +The @code{PRIMARY KEY} will be faster than any other key, as the +@code{PRIMARY KEY} is stored together with the row data. As the other keys are +stored as the key data + the @code{PRIMARY KEY}, it's important to keep the +@code{PRIMARY KEY} as short as possible to save disk and get better speed. +@item +@code{LOCK TABLES} works on @code{BDB} tables as with other tables. If +you don't use @code{LOCK TABLE}, @strong{MYSQL} will issue an internal +multiple-write lock on the table to ensure that the table will be +properly locked if another thread issues a table lock. +@item +Internal locking in @code{BDB} tables is done on page level. +@item +@code{SELECT COUNT(*) FROM table_name} is slow as @code{BDB} tables doesn't +maintain a count of the number of rows in the table. +@item +Scanning is slower than with @code{MyISAM} tables as one has data in BDB +tables stored in B-trees and not in a separate data file. +@item +The application must always be prepared to handle cases where +any change of a @code{BDB} table may make an automatic rollback and any +read may fail with a deadlock error. +@item +Keys are not compressed to previous keys as with ISAM or MyISAM +tables. In other words, the key information will take a little more +space in @code{BDB} tables compared to MyISAM tables which don't use +@code{PACK_KEYS=0}. +@item +There is often holes in the BDB table to allow you to insert new rows in +the middle of the key tree. This makes BDB tables somewhat larger than +MyISAM tables. +@item +The optimizer needs to know an approximation of the number of rows in +the table. @strong{MySQL} solves this by counting inserts and +maintaining this in a separate segment in each BDB table. If you don't +do a lot of @code{DELETE} or @code{ROLLBACK}:s this number should be +accurate enough for the @strong{MySQL} optimizer, but as @strong{MySQL} +only store the number on close, it may be wrong if @strong{MySQL} dies +unexpectedly. It should not be fatal even if this number is not 100 % +correct. One can update the number of rows by executing @code{ANALYZE +TABLE} or @code{OPTIMIZE TABLE}. @xref{ANALYZE TABLE} . @xref{OPTIMIZE +TABLE}. +@item +If you get full disk with a @code{BDB} table, you will get an error +(probably error 28) and the transaction should roll back. This is in +contrast with @code{MyISAM} and @code{ISAM} tables where @code{mysqld} will +wait for enough free disk before continuing. +@end itemize + +@node BDB TODO, BDB portability, BDB characteristic, BDB +@subsection Some things we need to fix for BDB in the near future: + +@itemize @bullet +@item +It's very slow to open many BDB tables at the same time. If you are +going to use BDB tables, you should not have a very big table cache (> +256 ?) and you should use @code{--no-auto-rehash} with the @code{mysql} +client. We plan to partly fix this in 4.0. +@item +@code{SHOW TABLE STATUS} doesn't yet provide that much information for BDB +tables. +@item +Optimize performance. +@item +Change to not use page locks at all when we are scanning tables. +@end itemize + +@node BDB portability, BDB errors, BDB TODO, BDB +@subsection Operating systems supported by @strong{BDB} + +If you after having built @strong{MySQL} with support for BDB tables get +the following error in the log file when you start @code{mysqld}: + +@example +bdb: architecture lacks fast mutexes: applications cannot be threaded +Can't init dtabases +@end example + +This means that @code{BDB} tables are not supported for your architecture. +In this case you have to rebuild @strong{MySQL} without BDB table support. + +NOTE: The following list is not complete; We will update this as we get +more information about this. + +Currently we know that BDB tables works with the following operating +system. + +@itemize @bullet +@item +Linux 2.x intel +@item +Solaris sparc +@item +SCO OpenServer +@item +SCO UnixWare 7.0.1 +@end itemize + +It doesn't work with the following operating systems: + +@itemize @bullet +@item +Linux 2.x Alpha +@item +Max OS X +@end itemize + +@node BDB errors, , BDB portability, BDB +@subsection Errors You May Get When Using BDB Tables + +@itemize @bullet +@item +If you get the following error in the @code{hostname.err log} when +starting @code{mysqld}: + +@example +bdb: Ignoring log file: .../log.XXXXXXXXXX: unsupported log version # +@end example +it means that the new @code{BDB} version doesn't support the old log +file format. In this case you have to delete all @code{BDB} log BDB +from your database directory (the files that has the format +@code{log.XXXXXXXXXX} ) and restart @code{mysqld}. We would also +recommend you to do a @code{mysqldump --opt} of your old @code{BDB} +tables, delete the old table and restore the dump. +@item +If you are running in not @code{auto_commit} mode and delete a table you +are using by another thread you may get the following error messages in +the @strong{MySQL} error file: + +@example +001119 23:43:56 bdb: Missing log fileid entry +001119 23:43:56 bdb: txn_abort: Log undo failed for LSN: 1 3644744: Invalid +@end example + +This is not fatal but we don't recommend that you delete tables if you are +not in @code{auto_commit} mode, until this problem is fixed (the fix is +not trivial). +@end itemize + +@cindex GEMINI tables +@node GEMINI, InnoDB, BDB, Table types +@section GEMINI Tables + +@cindex GEMINI tables, overview +@menu +* GEMINI Overview:: +* Using GEMINI Tables:: +@end menu + +@node GEMINI Overview, Using GEMINI Tables, GEMINI, GEMINI +@subsection GEMINI Overview + +GEMINI is currently not included in the @strong{MySQL} 3.23 distribution +because it's not to our knowledge an open source (GPL) product. + +@code{GEMINI} is a transaction-safe table handler for @strong{MySQL}. It +provides row-level locking, robust transaction support and reliable +crash recovery. It is targeted for databases that need to handle heavy +multi-user updates typical of transaction processing applications while +still providing excellent performance for read-intensive operations. The +@code{GEMINI} table type is developed and supported by NuSphere +Corporation (see @url{http://www.nusphere.com}). + +@code{GEMINI} provides full ACID transaction properties (Atomic, +Consistent, Independent, and Durable) with a programming model that +includes support for statement atomicity and all four standard isolation +levels (Read Uncommitted, Read Committed, Repeatable Read, and +Serializable) defined in the SQL standard. + +The @code{GEMINI} tables support row-level and table-level locking to +increase concurrency in applications and allow reading of tables without +locking for maximum concurrency in a heavy update environment. The +transaction, locking, and recovery mechanisms are tightly integrated to +eliminate unnecessary administration overhead. + +In general, if @code{GEMINI} tables are selected for an application, it +is recommended that all tables updated in the application be +@code{GEMINI} tables to provide well-defined system behavior. If +non-@code{GEMINI} tables are mixed into the application then, ACID +transaction properties cannot be maintained. While there are clearly +cases where mixing table types is appropriate, it should always be done +with careful consideration of the impact on transaction consistency and +recoverability needs of the application and underlying database. + +The @code{GEMINI} table type is derived from a successful commercial +database and uses the storage kernel technology tightly integrated with +@strong{MySQL} server. The basic @code{GEMINI} technology is in use by +millions of users worldwide in production environments today. This +maturity allows @code{GEMINI} tables to provide a solution for those +users who require transaction-based behavior as part of their +applications. + +The @code{GEMINI} table handler supports a configurable data cache that +allows a significant portion of any database to be maintained in memory +while still allowing durable updates. + +@cindex GEMINI tables, features +@menu +* GEMINI Features:: +* GEMINI Concepts:: +* GEMINI Limitations:: +@end menu + +@node GEMINI Features, GEMINI Concepts, GEMINI Overview, GEMINI Overview +@subsubsection GEMINI Features + +The following summarizes the major features provided by @code{GEMINI} +tables. + +@itemize @bullet +@item +Supports all optimization statistics used by the @strong{MySQL} optimizer +including table cardinality, index range estimates and multi-component +selectivity to insure optimal query performance. + +@item +Maintains exact cardinality information for each table so @code{SELECT +COUNT(*) FROM} table-name always returns an answer immediately. + +@item +Supports index-only queries; when index data is sufficient to resolve a +query no record data is read (for non character types). + +@item +@code{GEMINI} uses block based I/O for better performance. There is no +performance penalty for using @code{VARCHAR} fields. The maximum record size is +currently 32K. + +@item +The number of rows in a single @code{GEMINI} table can be 4 quintillion +(full use of 64 bits). + +@item +Individual tables can be as large as 16 petabytes. + +@item +Locking is done at a record or row level rather than at table level +unless table locks are explicitly requested. When a row is inserted into +a table, other rows can be updated, inserted or deleted without waiting +for the inserted row to be committed. + +@item +Provides durable transactions backed by a crash recovery mechanism that +returns the database to a known consistent state in the event of an +unexpected failure. + +@item +Support for all isolation levels and statement atomicity defined in the +SQL standard. + +@item +Reliable Master Replication; the master database can survive system +failure and recover all committed transactions. +@end itemize + +@cindex GEMINI tables, concepts +@node GEMINI Concepts, GEMINI Limitations, GEMINI Features, GEMINI Overview +@subsubsection GEMINI Concepts + +This section highlights some of the important concepts behind +@code{GEMINI} and the @code{GEMINI} programming model, including: + +@itemize @bullet +@item +ACID Transactions +@item +Transaction COMMIT/ROLLBACK +@item +Statement Atomicity +@item +Recovery +@item +Isolation Levels +@item +Row-Level Locking +@end itemize + +These features are described below. + +@cindex GEMINI tables, ACID transactions +@noindent +@strong{ACID Transactions} + +ACID in the context of transactions is an acronym which stands for +@emph{Atomicity}, @emph{Consistency}, @emph{Isolation}, @emph{Durability}. + +@multitable @columnfractions .25 .75 +@item @sc{Attribute} @tab @sc{Description} +@item +@strong{Atomicity} +@tab A transaction allows for the grouping of one or more changes to +tables and rows in the database to form an atomic or indivisible +operation. That is, either all of the changes occur or none of them +do. If for any reason the transaction cannot be completed, everything +this transaction changed can be restored to the state it was in prior to +the start of the transaction via a rollback operation. + +@item +@strong{Consistency} +@tab +Transactions always operate on a consistent view of the data and when +they end always leave the data in a consistent state. Data may be said to +be consistent as long as it conforms to a set of invariants, such as no +two rows in the customer table have the same customer ID and all orders +have an associated customer row. While a transaction executes, these +invariants may be violated, but no other transaction will be allowed to +see these inconsistencies, and all such inconsistencies will have been +eliminated by the time the transaction ends. + +@item +@strong{Isolation} +@tab To a given transaction, it should appear as though it is running +all by itself on the database. The effects of concurrently running +transactions are invisible to this transaction, and the effects of this +transaction are invisible to others until the transaction is committed. + +@item +@strong{Durability} +@tab Once a transaction is committed, its effects are guaranteed to +persist even in the event of subsequent system failures. Until the +transaction commits, not only are any changes made by that transaction +not durable, but are guaranteed not to persist in the face of a system +failures, as crash recovery will rollback their effects. +@end multitable + +@cindex GEMINI tables, COMMIT/ROLLBACK +@noindent +@strong{Transaction COMMIT/ROLLBACK} + +As stated above, a transaction is a group of work being done to +data. Unless otherwise directed, @strong{MySQL} considers each statement +a transaction in itself. Multiple updates can be accomplished by placing +them in a single statement, however they are limited to a single table. + +Applications tend to require more robust use of transaction +concepts. Take, for example, a system that processes an order: A row may +be inserted in an order table, additional rows may be added to an +order-line table, updates may be made to inventory tables, etc. It is +important that if the order completes, all the changes are made to all +the tables involved; likewise if the order fails, none of the changes to +the tables must occur. To facilitate this requirement, @strong{MySQL} +has syntax to start a transaction called @code{BEGIN WORK}. All +statements that occur after the @code{BEGIN WORK} statement are grouped +into a single transaction. The end of this transaction occurs when a +@code{COMMIT} or @code{ROLLBACK} statement is encountered. After the +@code{COMMIT} or @code{ROLLBACK} the system returns back to the behavior +before the @code{BEGIN WORK} statement was encountered where every +statement is a transaction. + +To permanently turn off the behavior where every statement is a +transaction, @strong{MySQL} added a variable called +@code{AUTOCOMMIT}. The @code{AUTOCOMMIT} variable can have two values, +@code{1} and @code{0}. The mode where every statement is a transaction +is when @code{AUTOCOMMIT} is set to @code{1} (@code{AUTOCOMMIT=1}). When +@code{AUTOCOMMIT} is set to @code{0} (@code{AUTOCOMMIT=0}), then every +statement is part of the same transaction until the transaction end by +either @code{COMMIT} or @code{ROLLBACK}. Once a transaction completes, a +new transaction is immediately started and the process repeats. + +Here is an example of the SQL statements that you may find in a typical +order: + +@example +BEGIN WORK; + INSERT INTO order VALUES ...; + INSERT INTO order-lines VALUES ...; + INSERT INTO order-lines VALUES ...; + INSERT INTO order-lines VALUES ...; + UPDATE inventory WHERE ...; +COMMIT; +@end example + +This example shows how to use the @code{BEGIN WORK} statement to start a +transaction. If the variable @code{AUTOCOMMIT} is set to @code{0}, then +a transaction would have been started already. In this case, the +@code{BEGIN WORK} commits the current transaction and starts a new one. + +@cindex GEMINI tables, statement atomicity +@noindent +@strong{Statement Atomicity} + +As mentioned above, when running with @code{AUTOCOMMIT} set to @code{1}, +each statement executes as a single transaction. When a statement has an +error, then all changes make by the statement must be +undone. Transactions support this behavior. Non-transaction safe table +handlers would have a partial statement update where some of the changes +from the statement would be contained in the database and other changes +from the statement would not. Work would need to be done to manually +recover from the error. + +@cindex GEMINI tables, recovery +@noindent +@strong{Recovery} + +Transactions are the basis for database recovery. Recovery is what +supports the Durability attribute of the ACID transaction. + +@code{GEMINI} uses a separate file called the Recovery Log located in +the @code{$DATADIR} directory named @code{gemini.rl}. This file +maintains the integrity of all the @code{GEMINI} tables. @code{GEMINI} +can not recover any data from non-@code{GEMINI} tables. In addition, the +@code{gemini.rl} file is used to rollback transactions in support of the +@code{ROLLBACK} statement. + +In the event of a system failure, the next time the @strong{MySQL} +server is started, @code{GEMINI} will automatically go through its +crash recovery process. The result of crash recovery is that all the +@code{GEMINI} tables will contain the latest changes made to them, and +all transactions that were open at the time of the crash will have been +rolled back. + +The @code{GEMINI} Recovery Log reuses space when it can. Space can be +reused when information in the Recovery Log is no longer needed for +crash recovery or rollback. + +@cindex GEMINI tables, isolation levels +@noindent +@strong{Isolation Levels} + +There are four isolation levels supported by @code{GEMINI}: + +@itemize @bullet +@item +READ UNCOMMITTED +@item +READ COMMITTED +@item +REPEATABLE READ +@item +SERIALIZABLE +@end itemize + +These isolation levels apply only to shared locks obtained by select +statements, excluding select for update. Statements that get exclusive +locks always retain those locks until the transaction commits or rolls +back. + +By default, @code{GEMINI} operates at the @code{READ COMMITTED} +level. You can override the default using the following command: + +@example +SET [GLOBAL | SESSION] TRANSACTION ISOLATION LEVEL [READ UNCOMMITTED | +READ COMMITTED | REPEATABLE READ | SERIALIZABLE ] +@end example + +If the @code{SESSION} qualifier used, the specified isolation level +persists for the entire session. If the @code{GLOBAL} qualifier is used, +the specified isolation level is applied to all new connections from +this point forward. Note that the specified isolation level will not +change the behavior for existing connections including the connection +that exectues the @code{SET GLOBAL TRANSACTION ISOLATION LEVEL} +statement. + +@multitable @columnfractions .30 .70 +@item @sc{Isolation Level} @tab @sc{Description} + +@item +@strong{READ UNCOMMITTED} +@tab Does not obtain any locks when reading rows. This means that if a +row is locked by another process in a transaction that has a more strict +isolation level, the @code{READ UNCOMMITTED} query will not wait until +the locks are released before reading the row. You will get an error if +attempt any updates while running at this isolation level. + +@item +@strong{READ COMMITTED} +@tab Locks the requested rows long enough to copy the row from the +database block to the client row buffer. If a @code{READ COMMITTED} +query finds that a row is locked exclusively by another process, it will +wait until either the row has been released, or the lock timeout value +has expired. + +@item +@strong{REPEATABLE READ} +@tab Locks all the rows needed to satisfy the query. These locks are +held until the transaction ends (commits or rolls back). If a +@code{REPEATABLE READ} query finds that a row is locked exclusively by +another process, it will wait until either the row has been released, or +the lock timeout value has expired. + +@item +@strong{SERIALIZABLE} +@tab Locks the table that contains the rows needed to satisfy the +query. This lock is held until the transaction ends (commits or rolls +back). If a @code{SERIALIZABLE} query finds that a row is exclusively +locked by another process, it will wait until either the row has been +released, or the lock timeout value has expired. +@end multitable + +The statements that get exclusive locks are @code{INSERT}, +@code{UPDATE}, @code{DELETE} and @code{SELECT ... FOR UPDATE}. Select +statements without the @code{FOR UPDATE} qualifier get shared locks +which allow other not ''for update'' select statements to read the same +rows but block anyone trying to update the row from accessing it. Rows +or tables with exclusive locks block all access to the row from other +transactions until the transaction ends. + +In general terms, the higher the Isolation level the more likelihood of +having concurrent locks and therefore lock conflicts. In such cases, +adjust the @code{-O gemini_lock_table_size} accordingly. + +@cindex GEMINI tables, row-level locking +@noindent +@strong{Row-Level Locking} + +@code{GEMINI} uses row locks, which allows high concurrency for requests +on the same table. + +In order to avoid lock table overflow, SQL statements that require +applying locks to a large number of rows should either be run at the +serializable isolation level or should be covered by a lock table +statement. + +Memory must be pre-allocated for the lock table. The mysqld server +startup option @code{-0 gemini_lock_table_size} can be used to adjust +the number of concurrent locks. + +@cindex GEMINI tables, limitations +@node GEMINI Limitations, , GEMINI Concepts, GEMINI Overview +@subsubsection GEMINI Limitations + +The following limitations are in effect for the current version of +@code{GEMINI}: + +@itemize @bullet +@item +@code{DROP DATABASE} does not work with @code{GEMINI} tables; instead, +drop all the tables in the database first, then drop the database. + +@item +Maximum number of @code{GEMINI} tables is 1012. + +@item +Maximum number of @code{GEMINI} files a server can manage is 1012. Each +table consumes one file; an additional file is consumed if the table has +any indexes defined on it. + +@item +Maximum size of BLOBs is 16MB. + +@item +@code{FULLTEXT} indexes are not supported with @code{GEMINI} tables. + +@item +There is no support for multi-component @code{AUTO_INCREMENT} fields +that provide alternating values at the component level. If you try to +create such a field, @code{GEMINI} will refuse. + +@item +@code{TEMPORARY TABLES} are not supported by @code{GEMINI}. The +statement @code{CREATE TEMPORARY TABLE ... TYPE=GEMINI} will generate +the response: @code{ERROR 1005: Can't create table '/tmp/#sqlxxxxx' +(errno: 0)}. + +@item +@code{FLUSH TABLES} has not been implemented with @code{GEMINI} tables. +@end itemize + +@cindex GEMINI tables, using +@node Using GEMINI Tables, , GEMINI Overview, GEMINI +@subsection Using GEMINI Tables + +This section explains the various startup options you can use with +@code{GEMINI} tables, how to backup @code{GEMINI} tables, some +performance considerations and sample configurations, and a brief +discussion of when to use @code{GEMINI} tables. + +Specifically, the topics covered in this section are: + +@itemize @bullet +@item +Startup Options +@item +Creating @code{GEMINI} Tables +@item +Backing Up @code{GEMINI} Tables +@item +Using Auto_Increment Columns With @code{GEMINI} Tables +@item +Performance Considerations +@item +Sample Configurations +@item +When To Use @code{GEMINI} Tables +@end itemize + +@cindex GEMINI tables, startup options +@menu +* Startup Options:: +* Creating GEMINI Tables:: +* Backing Up GEMINI Tables:: +* Restoring GEMINI Tables:: +* Using Auto_Increment Columns With GEMINI Tables:: +* Performance Considerations:: +* Sample Configurations:: +* When To Use GEMINI Tables:: +@end menu + +@node Startup Options, Creating GEMINI Tables, Using GEMINI Tables, Using GEMINI Tables +@subsubsection Startup Options + +The table below lists options to mysqld that can be used to change the +behavior of @code{GEMINI} tables. + +@multitable @columnfractions .40 .60 +@item @sc{Option} @tab @sc{Description} + +@item +@code{--default-table-type=gemini} +@tab Sets the default table handler to be @code{GEMINI}. All create +table statements will create @code{GEMINI} tables unless otherwise +specified with @code{TYPE=@var{table-type}}. As noted above, there is +currently a limitation with @code{TEMPORARY} tables using @code{GEMINI}. + +@item +@code{--gemini-flush-log-at-commit} +@tab Forces the recovery log buffers to be flushed after every +commit. This can have a serious performance penalty, so use with +caution. + +@item +@code{--gemini-recovery=FULL | NONE | FORCE} +@tab Sets the recovery mode. Default is @code{FULL}. @code{NONE} is +useful for performing repeatable batch operations because the updates +are not recorded in the recovery log. @code{FORCE} skips crash recovery +upon startup; this corrupts the database, and should be used in +emergencies only. + +@item +@code{--gemini-unbuffered-io} +@tab All database writes bypass the OS cache. This can provide a +performance boost on heavily updated systems where most of the dataset +being worked on is cached in memory with the @code{gemini_buffer_cache} +parameter. + +@item +@code{--O gemini_buffer_cache=size} +@tab Amount of memory to allocate for database buffers, including Index +and Record information. It is recommended that this number be 10% of the +total size of all @code{GEMINI} tables. Do not exceed amount of memory +on the system! + +@item +@code{--O gemini_connection_limit=#} +@tab Maximum number of connections to @code{GEMINI}; default is +@code{100}. Each connection consumes about 1K of memory. + +@item +@code{--O gemini_io_threads=#} +@tab Number of background I/O threads; default is @code{2}. Increase the +number when using @code{--gemini-unbuffered-io} + +@item +@code{--O gemini_lock_table_size=#} +@tab Sets the maximum number of concurrent locks; default is 4096. Using +@code{SET [ GLOBAL | SESSION ] TRANSACTION ISOLATION = ...} will +determine how long a program will hold row locks. + +@item +@code{--O gemini_lock_wait_timeout=seconds} +@tab Number of seconds to wait for record locks when performing queries; +default is 10 seconds. Using @code{SET [ GLOBAL | SESSION ] TRANSACTION +ISOLATION = ...} will determine how long a program will hold row locks. + +@item +@code{--skip-gemini} +@tab Do not use @code{GEMINI}. If you use @code{--skip-gemini}, @strong{MySQL} +will not initialize the @code{GEMINI} table handler, saving memory; you +cannot use @code{GEMINI} tables if you use @code{--skip-gemini}. + +@item +@code{--transaction-isolation=READ-UNCOMMITTED | READ-COMMITTED | REPEATABLE-READ | SERIALIZABLE} +@tab Sets the GLOBAL transaction isolation level for all users that +connect to the server; can be overridden with the SET ISOLATION LEVEL +statement. +@end multitable + +@cindex GEMINI tables, creating +@node Creating GEMINI Tables, Backing Up GEMINI Tables, Startup Options, Using GEMINI Tables +@subsubsection Creating GEMINI Tables + +@code{GEMINI} tables can be created by either using the @code{CREATE +TABLE} syntax or the @code{ALTER TABLE} syntax. + +@itemize @bullet +@item +The syntax for creating a @code{GEMINI} table is: + +@example +CREATE TABLE @var{table-name} (....) TYPE=GEMINI; +@end example + +@item +The syntax to convert a table to @code{GEMINI} is: + +@example +ALTER TABLE @var{table-name} TYPE=GEMINI; +@end example +@end itemize + +@xref{Tutorial}, for more information on how to create and use +@code{MySQL} tables. + +@cindex GEMINI tables, backing up +@node Backing Up GEMINI Tables, Restoring GEMINI Tables, Creating GEMINI Tables, Using GEMINI Tables +@subsubsection Backing Up GEMINI Tables + +@code{GEMINI} supports both @code{BACKUP TABLE} and @code{RESTORE TABLE} +syntax. To learn more about how to use @code{BACKUP} and @code{RESTORE}, +see @ref{BACKUP TABLE} and @ref{RESTORE TABLE}. + +To backup @code{GEMINI} tables outside of the @code{MySQL} environment, +you must first shut down the @code{MySQL} server. Once the server is +shut down, you can copy the files associated with @code{GEMINI} to a +different location. The files that make up the @code{GEMINI} table +handler are: + +@itemize @bullet +@item +All files associated with a table with a @code{.gmd} extention below the +@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, +@code{@var{table}.gmi}, and @code{@var{table}.frm} +@item +@code{gemini.db} in the @code{$DATADIR} directory +@item +@code{gemini.rl} in the @code{$DATADIR} directory +@item +@code{gemini.lg} in the @code{$DATADIR} directory +@end itemize + +All the @code{GEMINI} files must be copied together. You can not copy +just the @code{.gmi} and @code{.gmd} files to a different +@code{$DATADIR} and have them become part of a new database. You can +copy an entire @code{$DATADIR} directory to another location and start a +@strong{MySQL} server using the new @code{$DATADIR}. + +@cindex GEMINI tables, restoring +@node Restoring GEMINI Tables, Using Auto_Increment Columns With GEMINI Tables, Backing Up GEMINI Tables, Using GEMINI Tables +@subsubsection Restoring GEMINI Tables + +To restore @code{GEMINI} tables outside of the @code{MySQL} environment, +you must first shut down the @code{MySQL} server. Once the server is +shut down, you can remove all @code{GEMINI} files in the target +@code{$DATADIR} and then copy the files previously backed up into the +@code{$DATADIR} directory. + +As mentioned above, the files that make up the @code{GEMINI} table +handler are: + +@itemize @bullet +@item +All files associated with a table with a @code{.gmd} extention below the +@code{$DATADIR} directory. Such files include @code{@var{table}.gmd}, +@code{@var{table}.gmi}, and @code{@var{table}.frm} +@item +@code{gemini.db} in the @code{$DATADIR} directory +@item +@code{gemini.rl} in the @code{$DATADIR} directory +@item +@code{gemini.lg} in the @code{$DATADIR} directory +@end itemize + +When restoring a table, all the @code{GEMINI} files must be copied +together. You can not restore just the @code{.gmi} and @code{.gmd} +files. + +@cindex GEMINI tables, auto_increment +@node Using Auto_Increment Columns With GEMINI Tables, Performance Considerations, Restoring GEMINI Tables, Using GEMINI Tables +@subsubsection Using Auto_Increment Columns With GEMINI Tables + +As mentioned previously, @code{GEMINI} tables support row-level and +table-level locking to increase concurrency in applications and to allow +reading of tables without locking for maximum concurrency in heavy +update environments. This feature has several implications when working +with @code{auto_increment} tables. + +In @code{MySQL}, when a column is defined as an @code{auto_increment} +column, and a row is inserted into the table with a @code{NULL} for the +column, the @code{auto_increment} column is updated to be 1 higher than +the highest value in the column. + +With @code{MyISAM} tables, the @code{auto_increment} function is +implemented by looking in the index and finding the highest value and +adding 1 to it. This is possible because the entire @code{ISAM} table is +locked during the update period and the increment value is therefore +guaranteed to not be changing. + +With @code{GEMINI} tables, the @code{auto_increment} function is +implemented by maintaining a counter in a separate location from the +table data. Instead of looking at the highest value in the table index, +@code{GEMINI} tables look at this separately maintained counter. This +means that in a transactional model, unlike the bottleneck inherent in +the @code{MyISAM} approach, @code{GEMINI} users do @b{not} have to wait +until the transaction that added the last value either commits or +rollbacks before looking at the value. + +Two side-effects of the @code{GEMINI} implementation are: + +@itemize @bullet +@item +If an insert is done where the column with the @code{auto_increment} is +specified, and this specified value is the highest value, @code{MyISAM} +uses it as its @code{auto_increment} value, and every subsequent insert +is based on this. By contrast, @code{GEMINI} does not use this value, +but instead uses the value maintained in the separate @code{GEMINI} +counter location. + +@item +To set the counter to a specific value, you can use @code{SET +insert_id=#} and insert a new row in the table. However, as a general +rule, values should not be inserted into an @code{auto_increment} +column; the database manager should be maintaining this field, not the +application. @code{SET insert_id} is a recovery mechanism that should be +used in case of error only. +@end itemize + +Note that if you delete the row containing the maximum value for an +@code{auto_increment} column, the value will be reused with a +@code{GEMINI} table but not with a @code{MyISAM} table. + +See @ref{CREATE TABLE} for more information about creating +@code{auto_increment} columns. + +@cindex GEMINI tables, peformance considerations +@node Performance Considerations, Sample Configurations, Using Auto_Increment Columns With GEMINI Tables, Using GEMINI Tables +@subsubsection Performance Considerations + +In addition to designing the best possible application, configuration of +the data and the server startup parameters need to be considered. How +the hardware is being used can have a dramatic affect on how fast the +system will respond to queries. Disk Drives and Memory must both be +considered. + +@noindent +@strong{Disk Drives} + +For best performance, you want to spread the data out over as many disks +as possible. Using RAID 10 stripes work very well. If there are a lot of +updates then the recovery log (@code{gemini.rl}) should be on a +relatively quiet disk drive. + +To spread the data out without using RAID 10, you can do the following: + +@itemize @bullet +@item +Group all the tables into three categories: Heavy Use, Moderate Use, +Light Use. + +@item +Take the number of disk drives available and use a round-robin approach +to the three categories grouping the tables on a disk drive. The result +will be an equal distribution of Heavy/Moderate/Light tables assigned to +each disk drive. + +@item +Once the tables have been converted to @code{GEMINI} by using the +@code{ALTER TABLE TYPE=GEMINI} statements, move (@code{mv}) the +@code{.gmd} and @code{.gmi} files to a different disk drive and link +(@code{ln -s}) them back to the original directory where the @code{.frm} +file resides. + +@item +Finally, move the @code{gemini.rl} file to its quiet disk location and link +the file back to the @code{$DATADIR} directory. +@end itemize + +@noindent +@strong{Memory} + +The more data that can be placed in memory the faster the access to the +data. Figure out how large the @code{GEMINI} data is by adding up the +@code{.gmd} and @code{.gmi} file sizes. If you can, put at least 10% of +the data into memory. You allocate memory for the rows and indexes by +using the @code{gemini_buffer_cache} startup parameter. For example: + +@example +mysqld -O gemini_buffer_cache=800M +@end example + +@noindent +would allocate 800 MB of memory for the @code{GEMINI} buffer cache. + +@cindex GEMINI tables, sample configurations +@node Sample Configurations, When To Use GEMINI Tables, Performance Considerations, Using GEMINI Tables +@subsubsection Sample Configurations + +Based on the performance considerations above, we can look at some +examples for how to get the best performance out of the system when +using @code{GEMINI} tables. + +@multitable @columnfractions .30 .70 +@item @sc{Hardware} @tab @sc{Configuration} +@item +One CPU, 128MB memory, one disk drive +@tab Allocate 80MB of memory for reading and updating @code{GEMINI} +tables by starting the mysqld server with the following option: + +@example +-O gemini_buffer_cache=80M +@end example + +@item +Two CPUs, 512MB memory, four disk drives +@tab Use RAID 10 to stripe the data across all available disks, or use +the method described in the performance considerations section, +above. Allocate 450MB of memory for reading/updating @code{GEMINI} +tables: + +@example +-O gemini_buffer_cache=450M +@end example +@end multitable + +@cindex GEMINI tables, when to use +@node When To Use GEMINI Tables, , Sample Configurations, Using GEMINI Tables +@subsubsection When To Use GEMINI Tables + +Because the @code{GEMINI} table handler provides crash recovery and +transaction support, there is extra overhead that is not found in other +non-transaction safe table handlers. Here are some general guidelines +for when to employ @code{GEMINI} and when to use other non-transaction +safe tables (@code{NTST}). + +Note that in the following table, you could instead of GEMINI use +InnoDB or BDB tables. + +@multitable @columnfractions .30 .25 .45 +@item +@sc{Access Trends} @tab @sc{Table Type} @tab @sc{Reason} +@item +Read-only +@tab @code{NTST} +@tab Less overhead and faster +@item +Critical data +@tab @code{GEMINI} +@tab Crash recovery protection +@item +High concurrency +@tab @code{GEMINI} +@tab Row-level locking +@item +Heavy update +@tab @code{GEMINI} +@tab Row-level locking +@end multitable + +The table below shows how a typical application schema could be defined. + +@multitable @columnfractions .15 .30 .25 .30 +@item +@sc{Table} @tab @sc{Contents} @tab @sc{Table Type} @tab @sc{Reason} +@item +account +@tab Customer account data +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +order +@tab Orders for a customer +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +orderline +@tab Orderline detail for an order +@tab @code{GEMINI} +@tab Critical data, heavy update +@item +invdesc +@tab Inventory description +@tab @code{NTST} +@tab Read-only, frequent access +@item +salesrep +@tab Sales rep information +@tab @code{NTST} +@tab Infrequent update +@item +inventory +@tab Inventory information +@tab @code{GEMINI} +@tab High concurrency, critical data +@item +config +@tab System configuration +@tab @code{NTST} +@tab Read-only +@end multitable @cindex tutorial @cindex terminal monitor, defined @@ -29308,8 +29336,9 @@ your changes with the new @file{errmsg.txt} file. @node Character sets, Adding character set, Languages, Languages @subsection The Character Set Used for Data and Sorting -By default, @strong{MySQL} uses the ISO-8859-1 (Latin1) character -set. This is the character set used in the USA and western Europe. +By default, @strong{MySQL} uses the ISO-8859-1 (Latin1) character set +with sorting according to Swedish/Finnish. This is the character set suitable +in the USA and western Europe. All standard @strong{MySQL} binaries are compiled with @code{--with-extra-charsets=complex}. This will add code to all @@ -29321,12 +29350,12 @@ The character set determines what characters are allowed in names and how things are sorted by the @code{ORDER BY} and @code{GROUP BY} clauses of the @code{SELECT} statement. -You can change the character set with the -@code{--default-character-set} option when you start the server. -The character sets available depend on the @code{--with-charset=charset} -option to @code{configure}, and the character set configuration files -listed in @file{SHAREDIR/charsets/Index}. -@xref{Quick install}. +You can change the character set with the @code{--default-character-set} +option when you start the server. The character sets available depend +on the @code{--with-charset=charset} and @code{--with-extra-charset= +list-of-charset | complex | all} options to @code{configure}, and the +character set configuration files listed in +@file{SHAREDIR/charsets/Index}. @xref{configure options}. If you change the character set when running @strong{MySQL} (which may also change the sort order), you must run myisamchk -r -q on all @@ -34883,6 +34912,10 @@ After you have installed the packed table into the @strong{MySQL} database directory you should do @code{mysqladmin flush-tables} to force @code{mysqld} to start using the new table. +If you want to unpack a packed table, you can do this with the +@code{--unpack} option to @code{isamchk} or @code{myisamchk}. + + @cindex installation maintenance @cindex maintaining, tables @cindex tables, maintaining @@ -46301,6 +46334,12 @@ Our TODO section contains what we plan to have in 4.0. @xref{TODO MySQL 4.0}. @itemize @bullet @item +Renamed @code{safe_mysqld} to @code{mysqld_safe}. +@item +Allow one to use @code{IN} instead of @code{FROM} in @code{SHOW} commands. +@item +@code{SHOW INDEXES} is now a synonym for @code{SHOW INDEX}. +@item Added support for symbolic links to @code{MyISAM} tables. Symlink handling is now enabled by default for Windows. @item @@ -46422,6 +46461,8 @@ not yet 100% confident in this code. @appendixsubsec Changes in release 3.23.40 @itemize @bullet @item +Option @code{--force} to @code{myisamchk} now also updates states. +@item Added option @code{--warnings} to @code{mysqld}. Now @code{mysqld} only prints the error @code{Aborted connection} if this option is used. @item diff --git a/myisam/myisamchk.c b/myisam/myisamchk.c index 06d34c10659..dd23e214ccf 100644 --- a/myisam/myisamchk.c +++ b/myisam/myisamchk.c @@ -201,7 +201,7 @@ static struct option long_options[] = static void print_version(void) { - printf("%s Ver 1.46 for %s at %s\n",my_progname,SYSTEM_TYPE, + printf("%s Ver 1.47 for %s at %s\n",my_progname,SYSTEM_TYPE, MACHINE_TYPE); } @@ -234,7 +234,8 @@ static void usage(void) -F, --fast Check only tables that hasn't been closed properly\n\ -C, --check-only-changed\n\ Check only tables that has changed since last check\n\ - -f, --force Restart with -r if there are any errors in the table\n\ + -f, --force Restart with -r if there are any errors in the table.\n\ + States will be updated as with --update-state\n\ -i, --information Print statistics information about table that is checked\n\ -m, --medium-check Faster than extended-check, but only finds 99.99% of\n\ all errors. Should be good enough for most cases\n\ @@ -359,7 +360,7 @@ static void get_options(register int *argc,register char ***argv) break; case 'f': check_param.tmpfile_createflag= O_RDWR | O_TRUNC; - check_param.testflag|=T_FORCE_CREATE; + check_param.testflag|= T_FORCE_CREATE | T_UPDATE_STATE; break; case 'F': check_param.testflag|=T_FAST; diff --git a/sql/share/italian/errmsg.txt b/sql/share/italian/errmsg.txt index ab31fa279c4..b85dc03286a 100644 --- a/sql/share/italian/errmsg.txt +++ b/sql/share/italian/errmsg.txt @@ -199,15 +199,15 @@ "La tabella '%-.64s' e` segnalata come corrotta e l'ultima ricostruzione (automatica?) e` fallita", "Attenzione: Alcune delle modifiche alle tabelle non transazionali non possono essere ripristinate (roll back impossibile)", "La transazione a comandi multipli (multi-statement) ha richiesto piu` di 'max_binlog_cache_size' bytes di disco: aumentare questa variabile di mysqld e riprovare', -"This operation cannot be performed with a running slave, run SLAVE STOP first", -"This operation requires a running slave, configure slave and do SLAVE START", -"The server is not configured as slave, fix in config file or with CHANGE MASTER TO", -"Could not initialize master info structure, check permisions on master.info", -"Could not create slave thread, check system resources", -"User %-.64s has already more than 'max_user_connections' active connections", -"You may only use constant expressions with SET", -"Lock wait timeout exceeded", -"The total number of locks exceeds the lock table size", -"Update locks cannot be acquired during a READ UNCOMMITTED transaction", +"Questa operazione non puo' essere eseguita con un database 'slave' che gira, lanciare prima SLAVE STOP", +"Questa operaione richiede un database 'slave', configurarlo ed eseguire SLAVE START", +"Il server non e' configurato come 'slave', correggere il file di configurazione cambiando CHANGE MASTER TO", +"Impossibile inizializzare la struttura 'master info', controllare i permessi sul file master.info", +"Impossibile creare il thread 'slave', controllare le risorse di sistema", +"L'utente %-.64s ha gia' piu' di 'max_user_connections' connessioni attive", +"Si possono usare solo espressioni costanti con SET", +"E' scaduto il timeout per l'attesa del lock", +"Il numero totale di lock e' maggiore della grandezza della tabella di lock", +"I lock di aggiornamento non possono essere acquisiti durante una transazione 'READ UNCOMMITTED'", "DROP DATABASE not allowed while thread is holding global read lock", "CREATE DATABASE not allowed while thread is holding global read lock", From 9665a34a73b42209251759b32c003c0a01bfae2d Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 28 Jun 2001 15:24:28 +0300 Subject: [PATCH 11/11] Allow floats of type 1.0e1 --- Docs/manual.texi | 2 ++ mysql-test/t/type_float.test | 3 ++- sql/sql_lex.cc | 9 +++------ 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Docs/manual.texi b/Docs/manual.texi index 6843438b416..12c7df0679c 100644 --- a/Docs/manual.texi +++ b/Docs/manual.texi @@ -46461,6 +46461,8 @@ not yet 100% confident in this code. @appendixsubsec Changes in release 3.23.40 @itemize @bullet @item +Fixed parser to allow floats of type @code{1.0e1} (no sign after @code{e}). +@item Option @code{--force} to @code{myisamchk} now also updates states. @item Added option @code{--warnings} to @code{mysqld}. Now @code{mysqld} diff --git a/mysql-test/t/type_float.test b/mysql-test/t/type_float.test index 1496170a256..23941ad2913 100644 --- a/mysql-test/t/type_float.test +++ b/mysql-test/t/type_float.test @@ -3,7 +3,8 @@ # Numeric floating point. SELECT 10,10.0,10.,.1e+2,100.0e-1; -select 6e-05, -6e-05, --6e-05, -6e-05+1.000000; +SELECT 6e-05, -6e-05, --6e-05, -6e-05+1.000000; +SELECT 1e1,1.e1,1.0e1,1e+1,1.e+1,1.0e+1,1e-1,1.e-1,1.0e-1; drop table if exists t1; create table t1 (f1 float(24),f2 float(52)); diff --git a/sql/sql_lex.cc b/sql/sql_lex.cc index b8d2ee13b0e..a78fef62657 100644 --- a/sql/sql_lex.cc +++ b/sql/sql_lex.cc @@ -650,12 +650,9 @@ int yylex(void *arg) if (c == 'e' || c == 'E') { c = yyGet(); - if (c != '-' && c != '+' && !isdigit(c)) - { // No exp sig found - state= STATE_CHAR; - break; - } - if (!isdigit(yyGet())) + if (c == '-' || c == '+') + c = yyGet(); // Skipp sign + if (!isdigit(c)) { // No digit after sign state= STATE_CHAR; break;