This commit is contained in:
sasha@mysql.sashanet.com 2001-08-09 19:16:15 -06:00
commit f6836bf81a
102 changed files with 3488 additions and 624 deletions

View File

@ -138,13 +138,19 @@ bdb/test/logtrack.list
bdb/txn/txn_auto.c
binary/*
client/insert_test
client/log_event.cc
client/log_event.h
client/mf_iocache.c
client/mf_iocache.cc
client/mysql
client/mysqladmin
client/mysqlbinlog
client/mysqlcheck
client/mysqldump
client/mysqlimport
client/mysqlshow
client/mysqltest
client/mysys_priv.h
client/select_test
client/thimble
client/thread_test
@ -254,6 +260,7 @@ libmysqld/sql_string.cc
libmysqld/sql_table.cc
libmysqld/sql_test.cc
libmysqld/sql_udf.cc
libmysqld/sql_union.cc
libmysqld/sql_unions.cc
libmysqld/sql_update.cc
libmysqld/sql_yacc.cc
@ -290,6 +297,7 @@ mysql-test/r/*.reject
mysql-test/r/rpl_log.eval
mysql-test/share/mysql
mysql-test/var/*
mysql.kdevprj
mysql.proj
mysqld.S
mysqld.sym
@ -376,4 +384,3 @@ support-files/mysql.spec
tags
tmp/*
vio/viotest-ssl
libmysqld/sql_union.cc

0
Docs/Flags/indonesia.eps Normal file
View File

BIN
Docs/Flags/indonesia.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 133 B

0
Docs/Flags/indonesia.txt Normal file
View File

View File

BIN
Docs/Flags/yugoslavia.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 250 B

View File

View File

@ -40,6 +40,7 @@ linked_include_sources:
echo timestamp > linked_include_sources
linked_client_sources: @linked_client_targets@
cd client; $(MAKE) link_sources
echo timestamp > linked_client_sources
linked_libmysql_sources:

View File

@ -20,8 +20,9 @@ INCLUDES = -I$(srcdir)/../include $(openssl_includes) \
-I../include -I$(srcdir)/.. -I$(top_srcdir) \
-I..
LIBS = @CLIENT_LIBS@
LDADD = @CLIENT_EXTRA_LDFLAGS@ ../libmysql/libmysqlclient.la
bin_PROGRAMS = mysql mysqladmin mysqlcheck mysqlshow mysqldump mysqlimport mysqltest
LDADD = @CLIENT_EXTRA_LDFLAGS@ ../libmysql/libmysqlclient.la
bin_PROGRAMS = mysql mysqladmin mysqlcheck mysqlshow \
mysqldump mysqlimport mysqltest mysqlbinlog
noinst_PROGRAMS = insert_test select_test thread_test
noinst_HEADERS = sql_string.h completion_hash.h my_readline.h
mysql_SOURCES = mysql.cc readline.cc sql_string.cc completion_hash.cc
@ -36,10 +37,24 @@ insert_test_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
select_test_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
mysqltest_SOURCES= mysqltest.c
mysqltest_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
mysqlbinlog_SOURCES = mysqlbinlog.cc
mysqlbinlog_DEPENDENCIES= $(LIBRARIES) $(pkglib_LTLIBRARIES)
sql_src=log_event.h log_event.cc
mysys_src=mysys_priv.h
# Fix for mit-threads
DEFS = -DUNDEF_THREADS_HACK
link_sources:
for f in $(sql_src) ; do \
rm -f $$f; \
@LN_CP_F@ ../sql/$$f $$f; \
done; \
for f in $(mysys_src); do \
rm -f $$f; \
@LN_CP_F@ ../mysys/$$f $$f; \
done;
thread_test.o: thread_test.c
$(COMPILE) -c @MT_INCLUDES@ $(INCLUDES) $<

View File

@ -22,13 +22,19 @@
#include <my_sys.h>
#include <getopt.h>
#include <thr_alarm.h>
#define MYSQL_SERVER // We want the C++ version of net
#include <mysql.h>
#include "log_event.h"
#include "mini_client.h"
#define CLIENT_CAPABILITIES (CLIENT_LONG_PASSWORD | CLIENT_LONG_FLAG | CLIENT_LOCAL_FILES)
extern "C"
{
int simple_command(MYSQL *mysql,enum enum_server_command command,
const char *arg,
uint length, my_bool skipp_check);
int net_safe_read(MYSQL* mysql);
}
char server_version[SERVER_VERSION_LENGTH];
uint32 server_id = 0;
@ -108,7 +114,7 @@ static void die(const char* fmt, ...)
static void print_version()
{
printf("%s Ver 1.4 for %s at %s\n",my_progname,SYSTEM_TYPE, MACHINE_TYPE);
printf("%s Ver 1.5 for %s at %s\n",my_progname,SYSTEM_TYPE, MACHINE_TYPE);
}
@ -248,12 +254,12 @@ static int parse_args(int *argc, char*** argv)
static MYSQL* safe_connect()
{
MYSQL *local_mysql = mc_mysql_init(NULL);
MYSQL *local_mysql = mysql_init(NULL);
if(!local_mysql)
die("Failed on mc_mysql_init");
die("Failed on mysql_init");
if(!mc_mysql_connect(local_mysql, host, user, pass, 0, port, 0, 0))
die("failed on connect: %s", mc_mysql_error(local_mysql));
if(!mysql_real_connect(local_mysql, host, user, pass, 0, port, 0, 0))
die("failed on connect: %s", mysql_error(local_mysql));
return local_mysql;
}
@ -281,7 +287,7 @@ static void dump_remote_table(NET* net, const char* db, const char* table)
*p++ = table_len;
memcpy(p, table, table_len);
if(mc_simple_command(mysql, COM_TABLE_DUMP, buf, p - buf + table_len, 1))
if(simple_command(mysql, COM_TABLE_DUMP, buf, p - buf + table_len, 1))
die("Error sending the table dump command");
for(;;)
@ -314,14 +320,14 @@ static void dump_remote_log_entries(const char* logname)
len = (uint) strlen(logname);
int4store(buf + 6, 0);
memcpy(buf + 10, logname,len);
if(mc_simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1))
if(simple_command(mysql, COM_BINLOG_DUMP, buf, len + 10, 1))
die("Error sending the log dump command");
for(;;)
{
len = mc_net_safe_read(mysql);
len = net_safe_read(mysql);
if (len == packet_error)
die("Error reading packet from server: %s", mc_mysql_error(mysql));
die("Error reading packet from server: %s", mysql_error(mysql));
if(len == 1 && net->read_pos[0] == 254)
break; // end of data
DBUG_PRINT("info",( "len= %u, net->read_pos[5] = %d\n",
@ -391,7 +397,7 @@ static void dump_local_log_entries(const char* logname)
char llbuff[21];
my_off_t old_off = my_b_tell(file);
Log_event* ev = Log_event::read_log_event(file, 0);
Log_event* ev = Log_event::read_log_event(file);
if (!ev)
{
if (file->error)
@ -430,9 +436,6 @@ int main(int argc, char** argv)
if(use_remote)
{
#ifndef __WIN__
init_thr_alarm(10); // need to do this manually
#endif
mysql = safe_connect();
}
@ -457,7 +460,7 @@ int main(int argc, char** argv)
if (result_file != stdout)
my_fclose(result_file, MYF(0));
if (use_remote)
mc_mysql_close(mysql);
mysql_close(mysql);
return 0;
}

View File

@ -48,17 +48,19 @@ static MYSQL mysql_connection;
static char *opt_password=0, *current_user=0,
*current_host=0, *current_db=0, *fields_terminated=0,
*lines_terminated=0, *enclosed=0, *opt_enclosed=0,
*escaped=0, opt_low_priority=0, *opt_columns=0;
*escaped=0, opt_low_priority=0, *opt_columns=0,
*default_charset;
static uint opt_mysql_port=0;
static my_string opt_mysql_unix_port=0;
#include "sslopt-vars.h"
enum options {OPT_FTB=256, OPT_LTB, OPT_ENC, OPT_O_ENC, OPT_ESC,
OPT_LOW_PRIORITY, OPT_CHARSETS_DIR};
OPT_LOW_PRIORITY, OPT_CHARSETS_DIR, OPT_DEFAULT_CHARSET};
static struct option long_options[] =
{
{"character-sets-dir", required_argument, 0, OPT_CHARSETS_DIR},
{"default-character-set", required_argument, 0, OPT_DEFAULT_CHARSET},
{"columns", required_argument, 0, 'c'},
{"compress", no_argument, 0, 'C'},
{"debug", optional_argument, 0, '#'},
@ -119,6 +121,8 @@ file. The SQL command 'LOAD DATA INFILE' is used to import the rows.\n");
printf("\n\
-#, --debug[=...] Output debug log. Often this is 'd:t:o,filename`\n\
-?, --help Displays this help and exits.\n\
--default-character-set=...\n\
Set the default character set.\n\
--character-sets-dir=...\n\
Directory where character sets are\n\
-c, --columns=... Use only these columns to import the data to.\n\
@ -179,6 +183,9 @@ static int get_options(int *argc, char ***argv)
case 'C':
opt_compress=1;
break;
case OPT_DEFAULT_CHARSET:
default_charset= optarg;
break;
case OPT_CHARSETS_DIR:
charsets_dir= optarg;
break;
@ -269,6 +276,11 @@ static int get_options(int *argc, char ***argv)
fprintf(stderr, "You can't use --ignore (-i) and --replace (-r) at the same time.\n");
return(1);
}
if (default_charset)
{
if (set_default_charset_by_name(default_charset, MYF(MY_WME)))
exit(1);
}
(*argc)-=optind;
(*argv)+=optind;
if (*argc < 2)

View File

@ -71,30 +71,6 @@ btr_page_create(
dict_tree_t* tree, /* in: index tree */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Allocates a new file page to be used in an index tree. */
static
page_t*
btr_page_alloc(
/*===========*/
/* out: new allocated page,
x-latched */
dict_tree_t* tree, /* in: index tree */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. */
static
void
btr_page_free(
/*==========*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in, own: page to be freed */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Sets the child node file address in a node pointer. */
UNIV_INLINE
void
@ -319,11 +295,12 @@ btr_page_alloc_for_ibuf(
/******************************************************************
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */
static
page_t*
btr_page_alloc(
/*===========*/
/* out: new allocated page, x-latched */
/* out: new allocated page, x-latched;
NULL if out of space */
dict_tree_t* tree, /* in: index tree */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
@ -358,7 +335,10 @@ btr_page_alloc(
new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no,
file_direction, TRUE, mtr);
ut_a(new_page_no != FIL_NULL);
if (new_page_no == FIL_NULL) {
return(NULL);
}
new_page = buf_page_get(dict_tree_get_space(tree), new_page_no,
RW_X_LATCH, mtr);
@ -435,20 +415,22 @@ btr_page_free_for_ibuf(
}
/******************************************************************
Frees a file page used in an index tree. */
static
Frees a file page used in an index tree. Can be used also to (BLOB)
external storage pages, because the page level 0 can be given as an
argument. */
void
btr_page_free(
/*==========*/
btr_page_free_low(
/*==============*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page to be freed, x-latched */
ulint level, /* in: page level */
mtr_t* mtr) /* in: mtr */
{
fseg_header_t* seg_header;
page_t* root;
ulint space;
ulint page_no;
ulint level;
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
@ -465,8 +447,6 @@ btr_page_free(
}
root = btr_root_get(tree, mtr);
level = btr_page_get_level(page, mtr);
if (level == 0) {
seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF;
@ -480,6 +460,26 @@ btr_page_free(
fseg_free_page(seg_header, space, page_no, mtr);
}
/******************************************************************
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
void
btr_page_free(
/*==========*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page to be freed, x-latched */
mtr_t* mtr) /* in: mtr */
{
ulint level;
ut_ad(mtr_memo_contains(mtr, buf_block_align(page),
MTR_MEMO_PAGE_X_FIX));
level = btr_page_get_level(page, mtr);
btr_page_free_low(tree, page, level, mtr);
}
/******************************************************************
Sets the child node file address in a node pointer. */
UNIV_INLINE
@ -1276,6 +1276,7 @@ btr_insert_on_non_leaf_level(
dtuple_t* tuple, /* in: the record to be inserted */
mtr_t* mtr) /* in: mtr */
{
big_rec_t* dummy_big_rec;
btr_cur_t cursor;
ulint err;
rec_t* rec;
@ -1294,7 +1295,7 @@ btr_insert_on_non_leaf_level(
| BTR_KEEP_SYS_FLAG
| BTR_NO_UNDO_LOG_FLAG,
&cursor, tuple,
&rec, NULL, mtr);
&rec, &dummy_big_rec, NULL, mtr);
ut_a(err == DB_SUCCESS);
}

File diff suppressed because it is too large Load Diff

View File

@ -216,14 +216,44 @@ buf_calc_page_checksum(
/* out: checksum */
byte* page) /* in: buffer page */
{
ulint checksum;
ulint checksum;
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
+ ut_fold_binary(page + FIL_PAGE_DATA, UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN);
checksum = checksum & 0xFFFFFFFF;
checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN);
+ ut_fold_binary(page + FIL_PAGE_DATA,
UNIV_PAGE_SIZE - FIL_PAGE_DATA
- FIL_PAGE_END_LSN);
checksum = checksum & 0xFFFFFFFF;
return(checksum);
return(checksum);
}
/************************************************************************
Checks if a page is corrupt. */
ibool
buf_page_is_corrupted(
/*==================*/
/* out: TRUE if corrupted */
byte* read_buf) /* in: a database page */
{
ulint checksum;
checksum = buf_calc_page_checksum(read_buf);
if ((mach_read_from_4(read_buf + FIL_PAGE_LSN + 4)
!= mach_read_from_4(read_buf + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN + 4))
|| (checksum != mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN)
&& mach_read_from_4(read_buf + FIL_PAGE_LSN)
!= mach_read_from_4(read_buf
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN))) {
return(TRUE);
}
return(FALSE);
}
/************************************************************************
@ -1265,34 +1295,22 @@ buf_page_io_complete(
dulint id;
dict_index_t* index;
ulint io_type;
ulint checksum;
ut_ad(block);
io_type = block->io_fix;
if (io_type == BUF_IO_READ) {
checksum = buf_calc_page_checksum(block->frame);
/* From version 3.23.38 up we store the page checksum
to the 4 upper bytes of the page end lsn field */
if ((mach_read_from_4(block->frame + FIL_PAGE_LSN + 4)
!= mach_read_from_4(block->frame + UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN + 4))
|| (checksum != mach_read_from_4(block->frame
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN)
&& mach_read_from_4(block->frame + FIL_PAGE_LSN)
!= mach_read_from_4(block->frame
+ UNIV_PAGE_SIZE
- FIL_PAGE_END_LSN))) {
fprintf(stderr,
if (buf_page_is_corrupted(block->frame)) {
fprintf(stderr,
"InnoDB: Database page corruption or a failed\n"
"InnoDB: file read of page %lu.\n", block->offset);
fprintf(stderr,
fprintf(stderr,
"InnoDB: You may have to recover from a backup.\n");
exit(1);
exit(1);
}
if (recv_recovery_is_on()) {
@ -1601,11 +1619,28 @@ void
buf_print_io(void)
/*==============*/
{
ulint size;
ut_ad(buf_pool);
mutex_enter(&(buf_pool->mutex));
size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE;
printf("pages read %lu, created %lu, written %lu\n",
mutex_enter(&(buf_pool->mutex));
printf("LRU list length %lu \n", UT_LIST_GET_LEN(buf_pool->LRU));
printf("Free list length %lu \n", UT_LIST_GET_LEN(buf_pool->free));
printf("Flush list length %lu \n",
UT_LIST_GET_LEN(buf_pool->flush_list));
printf("Buffer pool size in pages %lu\n", size);
printf("Pending reads %lu \n", buf_pool->n_pend_reads);
printf("Pending writes: LRU %lu, flush list %lu, single page %lu\n",
buf_pool->n_flush[BUF_FLUSH_LRU],
buf_pool->n_flush[BUF_FLUSH_LIST],
buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]);
printf("Pages read %lu, created %lu, written %lu\n",
buf_pool->n_pages_read, buf_pool->n_pages_created,
buf_pool->n_pages_written);
mutex_exit(&(buf_pool->mutex));

View File

@ -1,7 +1,7 @@
/******************************************************
The database buffer buf_pool flush algorithm
(c) 1995 Innobase Oy
(c) 1995-2001 Innobase Oy
Created 11/11/1995 Heikki Tuuri
*******************************************************/
@ -15,13 +15,13 @@ Created 11/11/1995 Heikki Tuuri
#include "ut0byte.h"
#include "ut0lst.h"
#include "fil0fil.h"
#include "buf0buf.h"
#include "buf0lru.h"
#include "buf0rea.h"
#include "ibuf0ibuf.h"
#include "log0log.h"
#include "os0file.h"
#include "trx0sys.h"
/* When flushed, dirty blocks are searched in neigborhoods of this size, and
flushed along with the original page. */
@ -195,9 +195,145 @@ buf_flush_write_complete(
}
/************************************************************************
Does an asynchronous write of a buffer page. NOTE: in simulated aio we must
call os_aio_simulated_wake_handler_threads after we have posted a batch
of writes! */
Flushes possible buffered writes from the doublewrite memory buffer to disk,
and also wakes up the aio thread if simulated aio is used. It is very
important to call this function after a batch of writes has been posted,
and also when we may have to wait for a page latch! Otherwise a deadlock
of threads can occur. */
static
void
buf_flush_buffered_writes(void)
/*===========================*/
{
buf_block_t* block;
ulint len;
ulint i;
if (trx_doublewrite == NULL) {
os_aio_simulated_wake_handler_threads();
return;
}
mutex_enter(&(trx_doublewrite->mutex));
/* Write first to doublewrite buffer blocks. We use synchronous
aio and thus know that file write has been completed when the
control returns. */
if (trx_doublewrite->first_free == 0) {
mutex_exit(&(trx_doublewrite->mutex));
return;
}
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
} else {
len = trx_doublewrite->first_free * UNIV_PAGE_SIZE;
}
fil_io(OS_FILE_WRITE,
TRUE, TRX_SYS_SPACE,
trx_doublewrite->block1, 0, len,
(void*)trx_doublewrite->write_buf, NULL);
if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
len = (trx_doublewrite->first_free
- TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE;
fil_io(OS_FILE_WRITE,
TRUE, TRX_SYS_SPACE,
trx_doublewrite->block2, 0, len,
(void*)(trx_doublewrite->write_buf
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE),
NULL);
}
/* Now flush the doublewrite buffer data to disk */
fil_flush(TRX_SYS_SPACE);
/* We know that the writes have been flushed to disk now
and in recovery we will find them in the doublewrite buffer
blocks. Next do the writes to the intended positions. */
for (i = 0; i < trx_doublewrite->first_free; i++) {
block = trx_doublewrite->buf_block_arr[i];
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
}
/* Wake possible simulated aio thread to actually post the
writes to the operating system */
os_aio_simulated_wake_handler_threads();
/* Wait that all async writes to tablespaces have been posted to
the OS */
os_aio_wait_until_no_pending_writes();
/* Now we flush the data to disk (for example, with fsync) */
fil_flush_file_spaces(FIL_TABLESPACE);
/* We can now reuse the doublewrite memory buffer: */
trx_doublewrite->first_free = 0;
mutex_exit(&(trx_doublewrite->mutex));
}
/************************************************************************
Posts a buffer page for writing. If the doublewrite memory buffer is
full, calls buf_flush_buffered_writes and waits for for free space to
appear. */
static
void
buf_flush_post_to_doublewrite_buf(
/*==============================*/
buf_block_t* block) /* in: buffer block to write */
{
try_again:
mutex_enter(&(trx_doublewrite->mutex));
if (trx_doublewrite->first_free
>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
mutex_exit(&(trx_doublewrite->mutex));
buf_flush_buffered_writes();
goto try_again;
}
ut_memcpy(trx_doublewrite->write_buf
+ UNIV_PAGE_SIZE * trx_doublewrite->first_free,
block->frame, UNIV_PAGE_SIZE);
trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block;
trx_doublewrite->first_free++;
if (trx_doublewrite->first_free
>= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
mutex_exit(&(trx_doublewrite->mutex));
buf_flush_buffered_writes();
return;
}
mutex_exit(&(trx_doublewrite->mutex));
}
/************************************************************************
Does an asynchronous write of a buffer page. NOTE: in simulated aio and
also when the doublewrite buffer is used, we must call
buf_flush_buffered_writes after we have posted a batch of writes! */
static
void
buf_flush_write_block_low(
@ -222,15 +358,24 @@ buf_flush_write_block_low(
mach_write_to_8(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
block->newest_modification);
/* Write to the page the space id and page number */
mach_write_to_4(block->frame + FIL_PAGE_SPACE, block->space);
mach_write_to_4(block->frame + FIL_PAGE_OFFSET, block->offset);
/* We overwrite the first 4 bytes of the end lsn field to store
a page checksum */
mach_write_to_4(block->frame + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN,
buf_calc_page_checksum(block->frame));
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
if (!trx_doublewrite) {
fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE,
(void*)block->frame, (void*)block);
} else {
buf_flush_post_to_doublewrite_buf(block);
}
}
/************************************************************************
@ -251,14 +396,14 @@ buf_flush_try_page(
buf_block_t* block;
ibool locked;
ut_ad((flush_type == BUF_FLUSH_LRU) || (flush_type == BUF_FLUSH_LIST)
|| (flush_type == BUF_FLUSH_SINGLE_PAGE));
ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST
|| flush_type == BUF_FLUSH_SINGLE_PAGE);
mutex_enter(&(buf_pool->mutex));
block = buf_page_hash_get(space, offset);
if ((flush_type == BUF_FLUSH_LIST)
if (flush_type == BUF_FLUSH_LIST
&& block && buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE;
@ -286,7 +431,7 @@ buf_flush_try_page(
mutex_exit(&(buf_pool->mutex));
if (!locked) {
os_aio_simulated_wake_handler_threads();
buf_flush_buffered_writes();
rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE);
}
@ -300,7 +445,7 @@ buf_flush_try_page(
return(1);
} else if ((flush_type == BUF_FLUSH_LRU) && block
} else if (flush_type == BUF_FLUSH_LRU && block
&& buf_flush_ready_for_flush(block, flush_type)) {
/* VERY IMPORTANT:
@ -328,7 +473,7 @@ buf_flush_try_page(
return(1);
} else if ((flush_type == BUF_FLUSH_SINGLE_PAGE) && block
} else if (flush_type == BUF_FLUSH_SINGLE_PAGE && block
&& buf_flush_ready_for_flush(block, flush_type)) {
block->io_fix = BUF_IO_WRITE;
@ -385,6 +530,14 @@ buf_flush_try_neighbors(
/* If there is little space, it is better not to flush any
block except from the end of the LRU list */
low = offset;
high = offset + 1;
} else if (flush_type == BUF_FLUSH_LIST) {
/* Since semaphore waits require us to flush the
doublewrite buffer to disk, it is best that the
search area is just the page itself, to minimize
chances for semaphore waits */
low = offset;
high = offset + 1;
}
@ -418,13 +571,6 @@ buf_flush_try_neighbors(
mutex_exit(&(buf_pool->mutex));
/* In simulated aio we wake up the i/o-handler threads now that
we have posted a batch of writes: */
/* printf("Flush count %lu ; Waking i/o handlers\n", count); */
os_aio_simulated_wake_handler_threads();
return(count);
}
@ -565,13 +711,15 @@ buf_flush_batch(
mutex_exit(&(buf_pool->mutex));
if (buf_debug_prints && (page_count > 0)) {
buf_flush_buffered_writes();
if (buf_debug_prints && page_count > 0) {
if (flush_type == BUF_FLUSH_LRU) {
printf("To flush %lu pages in LRU flush\n",
printf("Flushed %lu pages in LRU flush\n",
page_count);
} else if (flush_type == BUF_FLUSH_LIST) {
printf("To flush %lu pages in flush list flush\n",
page_count, flush_type);
printf("Flushed %lu pages in flush list flush\n",
page_count);
} else {
ut_error;
}

View File

@ -49,7 +49,9 @@ ulint
buf_read_page_low(
/*==============*/
/* out: 1 if a read request was queued, 0 if the page
already resided in buf_pool */
already resided in buf_pool or if the page is in
the doublewrite buffer blocks in which case it is never
read into the pool */
ibool sync, /* in: TRUE if synchronous aio is desired */
ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ...,
ORed to OS_AIO_SIMULATED_WAKE_LATER (see below
@ -63,6 +65,16 @@ buf_read_page_low(
wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER;
mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER;
if (trx_doublewrite && space == TRX_SYS_SPACE
&& ( (offset >= trx_doublewrite->block1
&& offset < trx_doublewrite->block1
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|| (offset >= trx_doublewrite->block2
&& offset < trx_doublewrite->block2
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
return(0);
}
#ifdef UNIV_LOG_DEBUG
if (space % 2 == 1) {
/* We are updating a replicate space while holding the

View File

@ -13,7 +13,10 @@ Created 5/30/1994 Heikki Tuuri
#endif
#include "ut0rnd.h"
#include "rem0rec.h"
#include "page0page.h"
#include "dict0dict.h"
#include "btr0cur.h"
byte data_error; /* data pointers of tuple fields are initialized
to point here for error checking */
@ -378,6 +381,172 @@ dtuple_sprintf(
return(len);
}
/******************************************************************
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index. */
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
/* out, own: created big record vector,
NULL if we are not able to shorten
the entry enough, i.e., if there are
too many short fields in entry */
dict_index_t* index, /* in: index */
dtuple_t* entry) /* in: index entry */
{
mem_heap_t* heap;
big_rec_t* vector;
dfield_t* dfield;
ulint size;
ulint n_fields;
ulint longest;
ulint longest_i;
ulint i;
size = rec_get_converted_size(entry);
heap = mem_heap_create(size + dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t) + 1000);
vector = mem_heap_alloc(heap, sizeof(big_rec_t));
vector->heap = heap;
vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry)
* sizeof(big_rec_field_t));
/* Decide which fields to shorten: the algorithm is to look for
the longest field which does not occur in the ordering part
of any index on the table */
n_fields = 0;
while ((rec_get_converted_size(entry)
>= page_get_free_space_of_empty() / 2)
|| rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE) {
longest = 0;
for (i = dict_index_get_n_unique_in_tree(index);
i < dtuple_get_n_fields(entry); i++) {
/* Skip over fields which are ordering in some index */
if (dict_field_get_col(
dict_index_get_nth_field(index, i))
->ord_part == 0) {
dfield = dtuple_get_nth_field(entry, i);
if (dfield->len != UNIV_SQL_NULL &&
dfield->len > longest) {
longest = dfield->len;
longest_i = i;
}
}
}
if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10) {
/* Cannot shorten more */
mem_heap_free(heap);
return(NULL);
}
/* Move data from field longest_i to big rec vector,
but do not let data size of the remaining entry
drop below 128 which is the limit for the 2-byte
offset storage format in a physical record */
dfield = dtuple_get_nth_field(entry, longest_i);
vector->fields[n_fields].field_no = longest_i;
if (dtuple_get_data_size(entry) - dfield->len
<= REC_1BYTE_OFFS_LIMIT) {
vector->fields[n_fields].len =
dtuple_get_data_size(entry)
- REC_1BYTE_OFFS_LIMIT;
/* Since dfield will contain at least
a 20-byte reference to the extern storage,
we know that the data size of entry will be
> REC_1BYTE_OFFS_LIMIT */
} else {
vector->fields[n_fields].len = dfield->len;
}
vector->fields[n_fields].data = mem_heap_alloc(heap,
vector->fields[n_fields].len);
/* Copy data (from the end of field) to big rec vector */
ut_memcpy(vector->fields[n_fields].data,
((byte*)dfield->data) + dfield->len
- vector->fields[n_fields].len,
vector->fields[n_fields].len);
dfield->len = dfield->len - vector->fields[n_fields].len
+ BTR_EXTERN_FIELD_REF_SIZE;
/* Set the extern field reference in dfield to zero */
memset(((byte*)dfield->data)
+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
0, BTR_EXTERN_FIELD_REF_SIZE);
n_fields++;
}
vector->n_fields = n_fields;
return(vector);
}
/******************************************************************
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: entry whose data was put to vector */
big_rec_t* vector) /* in, own: big rec vector; it is
freed in this function */
{
dfield_t* dfield;
ulint i;
for (i = 0; i < vector->n_fields; i++) {
dfield = dtuple_get_nth_field(entry,
vector->fields[i].field_no);
/* Copy data from big rec vector */
ut_memcpy(((byte*)dfield->data)
+ dfield->len - BTR_EXTERN_FIELD_REF_SIZE,
vector->fields[i].data,
vector->fields[i].len);
dfield->len = dfield->len + vector->fields[i].len
- BTR_EXTERN_FIELD_REF_SIZE;
}
mem_heap_free(vector->heap);
}
/******************************************************************
Frees the memory in a big rec vector. */
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector) /* in, own: big rec vector; it is
freed in this function */
{
mem_heap_free(vector->heap);
}
#ifdef notdefined
/******************************************************************

View File

@ -90,6 +90,9 @@ struct fil_node_struct {
is ignored) */
ulint n_pending;
/* count of pending i/o-ops on this file */
ibool is_modified; /* this is set to TRUE when we write
to the file and FALSE when we call fil_flush
for this file space */
UT_LIST_NODE_T(fil_node_t) chain;
/* link field for the file chain */
UT_LIST_NODE_T(fil_node_t) LRU;
@ -301,6 +304,8 @@ fil_node_create(
node->size = size;
node->magic_n = FIL_NODE_MAGIC_N;
node->n_pending = 0;
node->is_modified = FALSE;
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
@ -720,6 +725,47 @@ fil_space_get_size(
return(size);
}
/***********************************************************************
Checks if the pair space, page_no refers to an existing page in a
tablespace file space. */
ibool
fil_check_adress_in_tablespace(
/*===========================*/
/* out: TRUE if the address is meaningful */
ulint id, /* in: space id */
ulint page_no)/* in: page number */
{
fil_space_t* space;
fil_system_t* system = fil_system;
ulint size;
ibool ret;
ut_ad(system);
mutex_enter(&(system->mutex));
HASH_SEARCH(hash, system->spaces, id, space, space->id == id);
if (space == NULL) {
ret = FALSE;
} else {
size = space->size;
if (page_no > size) {
ret = FALSE;
} else if (space->purpose != FIL_TABLESPACE) {
ret = FALSE;
} else {
ret = TRUE;
}
}
mutex_exit(&(system->mutex));
return(ret);
}
/***********************************************************************
Tries to reserve free extents in a file space. */
@ -812,8 +858,14 @@ fil_node_prepare_for_io(
fil_node_close(last_node, system);
}
node->handle = os_file_create(node->name, OS_FILE_OPEN,
OS_FILE_AIO, &ret);
if (space->purpose == FIL_LOG) {
node->handle = os_file_create(node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_LOG_FILE, &ret);
} else {
node->handle = os_file_create(node->name, OS_FILE_OPEN,
OS_FILE_AIO, OS_DATA_FILE, &ret);
}
ut_a(ret);
node->open = TRUE;
@ -851,7 +903,8 @@ void
fil_node_complete_io(
/*=================*/
fil_node_t* node, /* in: file node */
fil_system_t* system) /* in: file system */
fil_system_t* system, /* in: file system */
ulint type) /* in: OS_FILE_WRITE or ..._READ */
{
ut_ad(node);
ut_ad(system);
@ -860,6 +913,10 @@ fil_node_complete_io(
node->n_pending--;
if (type != OS_FILE_READ) {
node->is_modified = TRUE;
}
if (node->n_pending == 0) {
/* The node must be put back to the LRU list */
UT_LIST_ADD_FIRST(LRU, system->LRU, node);
@ -1016,7 +1073,7 @@ loop:
mutex_enter(&(system->mutex));
fil_node_complete_io(node, system);
fil_node_complete_io(node, system, type);
mutex_exit(&(system->mutex));
@ -1090,12 +1147,14 @@ fil_aio_wait(
fil_node_t* fil_node;
fil_system_t* system = fil_system;
void* message;
ulint type;
ut_ad(fil_validate());
if (os_aio_use_native_aio) {
#ifdef WIN_ASYNC_IO
ret = os_aio_windows_handle(segment, 0, &fil_node, &message);
ret = os_aio_windows_handle(segment, 0, &fil_node, &message,
&type);
#elif defined(POSIX_ASYNC_IO)
ret = os_aio_posix_handle(segment, &fil_node, &message);
#else
@ -1103,14 +1162,14 @@ fil_aio_wait(
#endif
} else {
ret = os_aio_simulated_handle(segment, (void**) &fil_node,
&message);
&message, &type);
}
ut_a(ret);
mutex_enter(&(system->mutex));
fil_node_complete_io(fil_node, fil_system);
fil_node_complete_io(fil_node, fil_system, type);
mutex_exit(&(system->mutex));
@ -1149,8 +1208,10 @@ fil_flush(
node = UT_LIST_GET_FIRST(space->chain);
while (node) {
if (node->open) {
if (node->open && node->is_modified) {
file = node->handle;
node->is_modified = FALSE;
mutex_exit(&(system->mutex));
@ -1159,9 +1220,11 @@ fil_flush(
handle is still open: we assume that the OS
will not crash or trap even if we pass a handle
to a closed file below in os_file_flush! */
/* printf("Flushing to file %s\n", node->name); */
os_file_flush(file);
mutex_enter(&(system->mutex));
}

View File

@ -3239,8 +3239,8 @@ fsp_validate(
ut_a(descr_count * FSP_EXTENT_SIZE == free_limit);
ut_a(n_used + n_full_frag_pages
== n_used2 + (free_limit + XDES_DESCRIBED_PER_PAGE - 1)
/ XDES_DESCRIBED_PER_PAGE
== n_used2 + 2* ((free_limit + XDES_DESCRIBED_PER_PAGE - 1)
/ XDES_DESCRIBED_PER_PAGE)
+ seg_inode_len_full + seg_inode_len_free);
ut_a(frag_n_used == n_used);

View File

@ -1698,8 +1698,7 @@ loop:
btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr);
if (data->size == 1
&& 0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
if (0 == page_get_n_recs(btr_pcur_get_page(&pcur))) {
/* This tree is empty */
@ -1946,6 +1945,7 @@ ibuf_insert_low(
ulint page_no,/* in: page number where to insert */
que_thr_t* thr) /* in: query thread */
{
big_rec_t* dummy_big_rec;
ulint entry_size;
btr_pcur_t pcur;
btr_cur_t* cursor;
@ -2101,7 +2101,8 @@ ibuf_insert_low(
if (mode == BTR_MODIFY_PREV) {
err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor,
ibuf_entry, &ins_rec, thr,
ibuf_entry, &ins_rec,
&dummy_big_rec, thr,
&mtr);
if (err == DB_SUCCESS) {
/* Update the page max trx id field */
@ -2121,7 +2122,8 @@ ibuf_insert_low(
err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG,
cursor,
ibuf_entry, &ins_rec, thr,
ibuf_entry, &ins_rec,
&dummy_big_rec, thr,
&mtr);
if (err == DB_SUCCESS) {
/* Update the page max trx id field */

View File

@ -357,6 +357,44 @@ btr_get_size(
/* out: number of pages */
dict_index_t* index, /* in: index */
ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */
/******************************************************************
Allocates a new file page to be used in an index tree. NOTE: we assume
that the caller has made the reservation for free extents! */
page_t*
btr_page_alloc(
/*===========*/
/* out: new allocated page, x-latched;
NULL if out of space */
dict_tree_t* tree, /* in: index tree */
ulint hint_page_no, /* in: hint of a good page */
byte file_direction, /* in: direction where a possible
page split is made */
ulint level, /* in: level where the page is placed
in the tree */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. NOTE: cannot free field external
storage pages because the page must contain info on its level. */
void
btr_page_free(
/*==========*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page to be freed, x-latched */
mtr_t* mtr); /* in: mtr */
/******************************************************************
Frees a file page used in an index tree. Can be used also to BLOB
external storage pages, because the page level 0 can be given as an
argument. */
void
btr_page_free_low(
/*==============*/
dict_tree_t* tree, /* in: index tree */
page_t* page, /* in: page to be freed, x-latched */
ulint level, /* in: page level */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Prints size info of a B-tree. */

View File

@ -151,11 +151,14 @@ btr_cur_optimistic_insert(
ulint flags, /* in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
btr_cur_t* cursor, /* in: cursor on page after which
to insert; cursor stays valid */
btr_cur_t* cursor, /* in: cursor on page after which to insert;
cursor stays valid */
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
@ -169,13 +172,19 @@ btr_cur_pessimistic_insert(
/*=======================*/
/* out: DB_SUCCESS or error number */
ulint flags, /* in: undo logging and locking flags: if not
zero, the parameters index and thr should be
specified */
zero, the parameter thr should be
specified; if no undo logging is specified,
then the caller must have reserved enough
free extents in the file space so that the
insertion will certainly succeed */
btr_cur_t* cursor, /* in: cursor after which to insert;
cursor does not stay valid */
cursor stays valid */
dtuple_t* entry, /* in: entry to insert */
rec_t** rec, /* out: pointer to inserted record if
succeed */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or
NULL */
que_thr_t* thr, /* in: query thread or NULL */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
@ -228,8 +237,9 @@ btr_cur_pessimistic_update(
/* out: DB_SUCCESS or error code */
ulint flags, /* in: undo logging, locking, and rollback
flags */
btr_cur_t* cursor, /* in: cursor on the record to update;
cursor does not stay valid */
btr_cur_t* cursor, /* in: cursor on the record to update */
big_rec_t** big_rec,/* out: big rec vector whose fields have to
be stored externally by the caller, or NULL */
upd_t* update, /* in: update vector; this is allowed also
contain trx id and roll ptr fields, but
the values in update vector have no effect */
@ -407,6 +417,92 @@ btr_estimate_number_of_different_key_vals(
/*======================================*/
/* out: estimated number of key values */
dict_index_t* index); /* in: index */
/***********************************************************************
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The fields are stored on pages allocated from leaf node
file segment of the index tree. */
ulint
btr_store_big_rec_extern_fields(
/*============================*/
/* out: DB_SUCCESS or error */
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
rec_t* rec, /* in: record */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
mtr_t* local_mtr); /* in: mtr containing the latch to
rec and to the tree */
/***********************************************************************
Frees the space in an externally stored field to the file space
management. */
void
btr_free_externally_stored_field(
/*=============================*/
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
byte* data, /* in: internally stored data
+ reference to the externally
stored part */
ulint local_len, /* in: length of data */
mtr_t* local_mtr); /* in: mtr containing the latch to
data an an X-latch to the index
tree */
/***************************************************************
Frees the externally stored fields for a record. */
void
btr_rec_free_externally_stored_fields(
/*==================================*/
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /* in: record */
mtr_t* mtr); /* in: mini-transaction handle which contains
an X-latch to record page and to the index
tree */
/***********************************************************************
Copies an externally stored field of a record to mem heap. */
byte*
btr_rec_copy_externally_stored_field(
/*=================================*/
/* out: the field copied to heap */
rec_t* rec, /* in: record */
ulint no, /* in: field number */
ulint* len, /* out: length of the field */
mem_heap_t* heap); /* in: mem heap */
/***********************************************************************
Copies an externally stored field of a record to mem heap. Parameter
data contains a pointer to 'internally' stored part of the field:
possibly some data, and the reference to the externally stored part in
the last 20 bytes of data. */
byte*
btr_copy_externally_stored_field(
/*=============================*/
/* out: the whole field copied to heap */
ulint* len, /* out: length of the whole field */
byte* data, /* in: 'internally' stored part of the
field containing also the reference to
the external part */
ulint local_len,/* in: length of data */
mem_heap_t* heap); /* in: mem heap */
/***********************************************************************
Stores the positions of the fields marked as extern storage in the update
vector, and also those fields who are marked as extern storage in rec
and not mentioned in updated fields. We use this function to remember
which fields we must mark as extern storage in a record inserted for an
update. */
ulint
btr_push_update_extern_fields(
/*==========================*/
/* out: number of values stored in ext_vect */
ulint* ext_vect, /* in: array of ulints, must be preallocated
to have place for all fields in rec */
rec_t* rec, /* in: record */
upd_t* update); /* in: update vector */
/*######################################################################*/
@ -516,6 +612,19 @@ and sleep this many microseconds in between */
#define BTR_CUR_RETRY_DELETE_N_TIMES 100
#define BTR_CUR_RETRY_SLEEP_TIME 50000
/* The reference in a field of which data is stored on a different page */
/*--------------------------------------*/
#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */
#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */
#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header
on that page */
#define BTR_EXTERN_LEN 12 /* 8 bytes containing the
length of the externally
stored part of the BLOB */
/*--------------------------------------*/
#define BTR_EXTERN_FIELD_REF_SIZE 20
extern ulint btr_cur_n_non_sea;
#ifndef UNIV_NONINL

View File

@ -378,6 +378,14 @@ buf_calc_page_checksum(
/*===================*/
/* out: checksum */
byte* page); /* in: buffer page */
/************************************************************************
Checks if a page is corrupt. */
ibool
buf_page_is_corrupted(
/*==================*/
/* out: TRUE if corrupted */
byte* read_buf); /* in: a database page */
/**************************************************************************
Gets the page number of a pointer pointing within a buffer frame containing
a file page. */

View File

@ -101,7 +101,7 @@ make sure that a read-ahead batch can be read efficiently in a single
sweep). */
#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4)
#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
#ifndef UNIV_NONINL
#include "buf0flu.ic"

View File

@ -14,6 +14,9 @@ Created 5/30/1994 Heikki Tuuri
#include "data0types.h"
#include "data0type.h"
#include "mem0mem.h"
#include "dict0types.h"
typedef struct big_rec_struct big_rec_t;
/* Some non-inlined functions used in the MySQL interface: */
void
@ -312,6 +315,41 @@ dtuple_sprintf(
char* buf, /* in: print buffer */
ulint buf_len,/* in: buf length in bytes */
dtuple_t* tuple); /* in: tuple */
/******************************************************************
Moves parts of long fields in entry to the big record vector so that
the size of tuple drops below the maximum record size allowed in the
database. Moves data only from those fields which are not necessary
to determine uniquely the insertion place of the tuple in the index. */
big_rec_t*
dtuple_convert_big_rec(
/*===================*/
/* out, own: created big record vector,
NULL if we are not able to shorten
the entry enough, i.e., if there are
too many short fields in entry */
dict_index_t* index, /* in: index */
dtuple_t* entry); /* in: index entry */
/******************************************************************
Puts back to entry the data stored in vector. Note that to ensure the
fields in entry can accommodate the data, vector must have been created
from entry with dtuple_convert_big_rec. */
void
dtuple_convert_back_big_rec(
/*========================*/
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: entry whose data was put to vector */
big_rec_t* vector);/* in, own: big rec vector; it is
freed in this function */
/******************************************************************
Frees the memory in a big rec vector. */
void
dtuple_big_rec_free(
/*================*/
big_rec_t* vector); /* in, own: big rec vector; it is
freed in this function */
/***************************************************************
Generates a random tuple. */
@ -396,7 +434,7 @@ dtuple_gen_search_tuple_TPC_C(
/* Structure for an SQL data field */
struct dfield_struct{
void* data; /* pointer to data */
ulint len; /* data length; UNIV_SQL_NULL if SQL null */
ulint len; /* data length; UNIV_SQL_NULL if SQL null; */
dtype_t type; /* type of data */
ulint col_no; /* when building index entries, the column
number can be stored here */
@ -423,6 +461,24 @@ struct dtuple_struct {
};
#define DATA_TUPLE_MAGIC_N 65478679
/* A slot for a field in a big rec vector */
typedef struct big_rec_field_struct big_rec_field_t;
struct big_rec_field_struct {
ulint field_no; /* field number in record */
ulint len; /* stored data len */
byte* data; /* stored data */
};
/* Storage format for overflow data in a big record, that is, a record
which needs external storage of data fields */
struct big_rec_struct {
mem_heap_t* heap; /* memory heap from which allocated */
ulint n_fields; /* number of stored fields */
big_rec_field_t* fields; /* stored fields */
};
#ifndef UNIV_NONINL
#include "data0data.ic"
#endif

View File

@ -307,12 +307,13 @@ dtuple_create(
/**************************************************************
The following function returns the sum of data lengths of a tuple. The space
occupied by the field structs or the tuple struct is not counted. */
occupied by the field structs or the tuple struct is not counted. Neither
is possible space in externally stored parts of the field. */
UNIV_INLINE
ulint
dtuple_get_data_size(
/*=================*/
/* out: sum of data lens */
/* out: sum of data lengths */
dtuple_t* tuple) /* in: typed data tuple */
{
dfield_t* field;
@ -382,7 +383,7 @@ dtuple_datas_are_equal(
field2 = dtuple_get_nth_field(tuple2, i);
data2 = (byte*) dfield_get_data(field2);
len2 = dfield_get_len(field2);
len2 = dfield_get_len(field2);
if (len1 != len2) {

View File

@ -651,8 +651,6 @@ dict_table_get_index(
char* name) /* in: index name */
{
dict_index_t* index = NULL;
mutex_enter(&(dict_sys->mutex));
index = dict_table_get_first_index(table);
@ -665,8 +663,6 @@ dict_table_get_index(
index = dict_table_get_next_index(index);
}
mutex_exit(&(dict_sys->mutex));
return(index);
}

View File

@ -143,7 +143,7 @@ struct dict_col_struct{
ulint clust_pos;/* position of the column in the
clustered index */
ulint ord_part;/* count of how many times this column
appears in an ordering fields of an index */
appears in ordering fields of an index */
char* name; /* name */
dtype_t type; /* data type */
dict_table_t* table; /* back pointer to table of this column */

View File

@ -196,6 +196,16 @@ fil_space_get_size(
/* out: space size */
ulint id); /* in: space id */
/***********************************************************************
Checks if the pair space, page_no refers to an existing page in a
tablespace file space. */
ibool
fil_check_adress_in_tablespace(
/*===========================*/
/* out: TRUE if the address is meaningful */
ulint id, /* in: space id */
ulint page_no);/* in: page number */
/***********************************************************************
Appends a new file to the chain of files of a space.
File must be closed. */

View File

@ -70,7 +70,7 @@ page_t*
fseg_create(
/*========*/
/* out: the page where the segment header is placed,
x-latched, FIL_NULL if could not create segment
x-latched, NULL if could not create segment
because of lack of space */
ulint space, /* in: space id */
ulint page, /* in: page where the segment header is placed: if

View File

@ -115,7 +115,7 @@ mach_write_to_4(
{
ut_ad(b);
#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
#if (0 == 1) && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
/* We do not use this even on Intel, because unaligned accesses may
be slow */
@ -143,7 +143,7 @@ mach_read_from_4(
/* out: ulint integer */
byte* b) /* in: pointer to four bytes */
{
#if notdefined && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
#if (0 == 1) && !defined(__STDC__) && defined(UNIV_INTEL) && (UNIV_WORD_SIZE == 4) && defined(UNIV_VISUALC)
/* We do not use this even on Intel, because unaligned accesses may
be slow */

View File

@ -59,6 +59,10 @@ log. */
#define OS_FILE_AIO 61
#define OS_FILE_NORMAL 62
/* Types for file create */
#define OS_DATA_FILE 100
#define OS_LOG_FILE 101
/* Error codes from os_file_get_last_error */
#define OS_FILE_NOT_FOUND 71
#define OS_FILE_DISK_FULL 72
@ -125,6 +129,7 @@ os_file_create(
if a new file is created or an old overwritten */
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
is desired, OS_FILE_NORMAL, if any normal file */
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success);/* out: TRUE if succeed, FALSE if error */
/***************************************************************************
Closes a file handle. In case of error, error number can be retrieved with
@ -263,6 +268,13 @@ os_aio(
operation); if mode is OS_AIO_SYNC, these
are ignored */
void* message2);
/****************************************************************************
Waits until there are no pending writes in os_aio_write_array. There can
be other, synchronous, pending writes. */
void
os_aio_wait_until_no_pending_writes(void);
/*=====================================*/
/**************************************************************************
Wakes up simulated aio i/o-handler threads if they have something to do. */
@ -298,7 +310,8 @@ os_aio_windows_handle(
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2);
void** message2,
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
#endif
#ifdef POSIX_ASYNC_IO
/**************************************************************************
@ -335,7 +348,8 @@ os_aio_simulated_handle(
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2);
void** message2,
ulint* type); /* out: OS_FILE_WRITE or ..._READ */
/**************************************************************************
Validates the consistency of the aio system. */

View File

@ -1,7 +1,7 @@
/***********************************************************************
Comparison services for records
(c) 1994-1996 Innobase Oy
(c) 1994-2001 Innobase Oy
Created 7/1/1994 Heikki Tuuri
************************************************************************/
@ -31,14 +31,18 @@ This function is used to compare a data tuple to a physical record.
Only dtuple->n_fields_cmp first fields are taken into account for
the the data tuple! If we denote by n = n_fields_cmp, then rec must
have either m >= n fields, or it must differ from dtuple in some of
the m fields rec has. */
the m fields rec has. If rec has an externally stored field we do not
compare it but return with value 0 if such a comparison should be
made. */
int
cmp_dtuple_rec_with_match(
/*======================*/
/* out: 1, 0, -1, if dtuple is greater, equal,
less than rec, respectively, when only the
common first fields are compared */
common first fields are compared, or
until the first externally stored field in
rec */
dtuple_t* dtuple, /* in: data tuple */
rec_t* rec, /* in: physical record which differs from
dtuple in some of the common fields, or which
@ -89,7 +93,8 @@ cmp_dtuple_rec_prefix_equal(
fields in dtuple */
/*****************************************************************
This function is used to compare two physical records. Only the common
first fields are compared. */
first fields are compared, and if an externally stored field is
encountered, then 0 is returned. */
int
cmp_rec_rec_with_match(

View File

@ -12,6 +12,7 @@ Created 5/30/1994 Heikki Tuuri
#include "univ.i"
#include "data0data.h"
#include "rem0types.h"
#include "mtr0types.h"
/* Maximum values for various fields (for non-blob tuples) */
#define REC_MAX_N_FIELDS (1024 - 1)
@ -162,6 +163,49 @@ rec_get_nth_field_size(
/* out: field size in bytes */
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
/***************************************************************
Gets the value of the ith field extern storage bit. If it is TRUE
it means that the field is stored on another page. */
UNIV_INLINE
ibool
rec_get_nth_field_extern_bit(
/*=========================*/
/* in: TRUE or FALSE */
rec_t* rec, /* in: record */
ulint i); /* in: ith field */
/**********************************************************
Returns TRUE if the extern bit is set in any of the fields
of rec. */
UNIV_INLINE
ibool
rec_contains_externally_stored_field(
/*=================================*/
/* out: TRUE if a field is stored externally */
rec_t* rec); /* in: record */
/***************************************************************
Sets the value of the ith field extern storage bit. */
void
rec_set_nth_field_extern_bit(
/*=========================*/
rec_t* rec, /* in: record */
ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr); /* in: mtr holding an X-latch to the page where
rec is, or NULL; in the NULL case we do not
write to log about the change */
/***************************************************************
Sets TRUE the extern storage bits of fields mentioned in an array. */
void
rec_set_field_extern_bits(
/*======================*/
rec_t* rec, /* in: record */
ulint* vec, /* in: array of field numbers */
ulint n_fields, /* in: number of fields numbers */
mtr_t* mtr); /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case we
do not write to log about the change */
/****************************************************************
The following function is used to get a copy of the nth
data field in the record to a buffer. */
@ -350,6 +394,15 @@ rec_sprintf(
#define REC_INFO_BITS 6 /* This is single byte bit-field */
/* Maximum lengths for the data in a physical record if the offsets
are given in one byte (resp. two byte) format. */
#define REC_1BYTE_OFFS_LIMIT 0x7F
#define REC_2BYTE_OFFS_LIMIT 0x7FFF
/* The data size of record must be smaller than this because we reserve
two upmost bits in a two byte offset for special purposes */
#define REC_MAX_DATA_SIZE (16 * 1024)
#ifndef UNIV_NONINL
#include "rem0rec.ic"
#endif

View File

@ -25,12 +25,6 @@ significant bytes and bits are written below less significant.
4 bits info bits
*/
/* Maximum lengths for the data in a physical record if the offsets
are given as one byte (resp. two byte) format. */
#define REC_1BYTE_OFFS_LIMIT 0x7F
#define REC_2BYTE_OFFS_LIMIT 0x7FFF
/* We list the byte offsets from the origin of the record, the mask,
and the shift needed to obtain each bit-field of the record. */
@ -66,6 +60,11 @@ one-byte and two-byte offsets */
#define REC_1BYTE_SQL_NULL_MASK 0x80
#define REC_2BYTE_SQL_NULL_MASK 0x8000
/* In a 2-byte offset the second most significant bit denotes
a field stored to another page: */
#define REC_2BYTE_EXTERN_MASK 0x4000
/***************************************************************
Sets the value of the ith field SQL null bit. */
@ -489,7 +488,7 @@ ulint
rec_2_get_field_end_info(
/*=====================*/
/* out: offset of the start of the field, SQL null
flag ORed */
flag and extern storage flag ORed */
rec_t* rec, /* in: record */
ulint n) /* in: field index */
{
@ -499,6 +498,63 @@ rec_2_get_field_end_info(
return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
}
/***************************************************************
Gets the value of the ith field extern storage bit. If it is TRUE
it means that the field is stored on another page. */
UNIV_INLINE
ibool
rec_get_nth_field_extern_bit(
/*=========================*/
/* in: TRUE or FALSE */
rec_t* rec, /* in: record */
ulint i) /* in: ith field */
{
ulint info;
if (rec_get_1byte_offs_flag(rec)) {
return(FALSE);
}
info = rec_2_get_field_end_info(rec, i);
if (info & REC_2BYTE_EXTERN_MASK) {
return(TRUE);
}
return(FALSE);
}
/**********************************************************
Returns TRUE if the extern bit is set in any of the fields
of rec. */
UNIV_INLINE
ibool
rec_contains_externally_stored_field(
/*=================================*/
/* out: TRUE if a field is stored externally */
rec_t* rec) /* in: record */
{
ulint n;
ulint i;
if (rec_get_1byte_offs_flag(rec)) {
return(FALSE);
}
n = rec_get_n_fields(rec);
for (i = 0; i < n; i++) {
if (rec_get_nth_field_extern_bit(rec, i)) {
return(TRUE);
}
}
return(FALSE);
}
/**********************************************************
Returns the offset of n - 1th field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
@ -616,7 +672,7 @@ rec_2_get_field_start_offs(
}
return(rec_2_get_prev_field_end_info(rec, n)
& ~REC_2BYTE_SQL_NULL_MASK);
& ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK));
}
/**********************************************************

View File

@ -56,6 +56,9 @@ row_ins_index_entry_low(
pessimistic descent down the index tree */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr); /* in: query thread */
/*******************************************************************
Inserts an index entry to index. Tries first optimistic, then pessimistic
@ -70,6 +73,9 @@ row_ins_index_entry(
DB_DUPLICATE_KEY, or some other error code */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr); /* in: query thread */
/***************************************************************
Inserts a row to a table. */

View File

@ -189,7 +189,9 @@ row_update_for_mysql(
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
handle */
/*************************************************************************
Does a table creation operation for MySQL. */
Does a table creation operation for MySQL. If the name of the created
table ends to characters INNODB_MONITOR, then this also starts
printing of monitor output by the master thread. */
int
row_create_table_for_mysql(
@ -209,7 +211,9 @@ row_create_index_for_mysql(
dict_index_t* index, /* in: index defintion */
trx_t* trx); /* in: transaction handle */
/*************************************************************************
Drops a table for MySQL. */
Drops a table for MySQL. If the name of the dropped table ends to
characters INNODB_MONITOR, then this also stops printing of monitor
output by the master thread. */
int
row_drop_table_for_mysql(

View File

@ -250,6 +250,7 @@ row_search_index_entry(
#define ROW_COPY_DATA 1
#define ROW_COPY_POINTERS 2
#define ROW_COPY_ALSO_EXTERNALS 3
/* The allowed latching order of index records is the following:
(1) a secondary index record ->

View File

@ -147,6 +147,9 @@ row_upd_build_difference(
fields, excluding roll ptr and trx id */
dict_index_t* index, /* in: clustered index */
dtuple_t* entry, /* in: entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
rec_t* rec, /* in: clustered index record */
mem_heap_t* heap); /* in: memory heap from which allocated */
/***************************************************************
@ -262,6 +265,9 @@ struct upd_field_struct{
constants in the symbol table of the
query graph */
dfield_t new_val; /* new value for the column */
ibool extern_storage; /* this is set to TRUE if dfield
actually contains a reference to
an externally stored field */
};
/* Update vector structure */
@ -318,6 +324,10 @@ struct upd_node_struct{
dtuple_t* row; /* NULL, or a copy (also fields copied to
heap) of the row to update; this must be reset
to NULL after a successful update */
ulint* ext_vec;/* array describing which fields are stored
externally in the clustered index record of
row */
ulint n_ext_vec;/* number of fields in ext_vec */
mem_heap_t* heap; /* memory heap used as auxiliary storage for
row; this must be emptied after a successful
update if node->row != NULL */
@ -349,7 +359,7 @@ struct upd_node_struct{
looked at and updated if an ordering
field changed */
/* Compilation info flags: these must fit within one byte */
/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */
#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be
changed in the update and no ordering
field of the clustered index */

View File

@ -23,6 +23,7 @@ upd_create(
mem_heap_t* heap) /* in: heap from which memory allocated */
{
upd_t* update;
ulint i;
update = mem_heap_alloc(heap, sizeof(upd_t));
@ -30,6 +31,10 @@ upd_create(
update->n_fields = n;
update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n);
for (i = 0; i < n; i++) {
update->fields[i].extern_storage = 0;
}
return(update);
}

View File

@ -27,6 +27,9 @@ extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
extern ulint* srv_data_file_is_raw_partition;
#define SRV_NEW_RAW 1
#define SRV_OLD_RAW 2
extern char** srv_log_group_home_dirs;
extern ulint srv_n_log_groups;
@ -52,10 +55,14 @@ extern ulint srv_lock_wait_timeout;
extern char* srv_unix_file_flush_method_str;
extern ulint srv_unix_file_flush_method;
extern ibool srv_use_doublewrite_buf;
extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority;
/*-------------------------------------------*/
extern ibool srv_print_innodb_monitor;
extern ulint srv_n_spin_wait_rounds;
extern ulint srv_spin_wait_delay;
extern ibool srv_priority_boost;
@ -104,26 +111,13 @@ typedef struct srv_sys_struct srv_sys_t;
/* The server system */
extern srv_sys_t* srv_sys;
/* Alternatives for file flush option in Unix; see the InnoDB manual about
/* Alternatives for fiel flush option in Unix; see the InnoDB manual about
what these mean */
#define SRV_UNIX_FDATASYNC 1
#define SRV_UNIX_O_DSYNC 2
#define SRV_UNIX_LITTLESYNC 3
#define SRV_UNIX_NOSYNC 4
/* Raw partition flags */
#define SRV_OLD_RAW 1
#define SRV_NEW_RAW 2
void
srv_mysql_thread_release(void);
/*==========================*/
os_event_t
srv_mysql_thread_event_get(void);
void
srv_mysql_thread_slot_free(
/*==========================*/
os_event_t event);
/*************************************************************************
Boots Innobase server. */

View File

@ -393,6 +393,7 @@ Memory pool mutex */
#define SYNC_RSEG_HEADER_NEW 591
#define SYNC_RSEG_HEADER 590
#define SYNC_TRX_UNDO_PAGE 570
#define SYNC_EXTERN_STORAGE 500
#define SYNC_FSP 400
#define SYNC_FSP_PAGE 395
/*------------------------------------- Insert buffer headers */
@ -415,6 +416,7 @@ Memory pool mutex */
the level is SYNC_MEM_HASH. */
#define SYNC_BUF_POOL 150
#define SYNC_BUF_BLOCK 149
#define SYNC_DOUBLEWRITE 140
#define SYNC_ANY_LATCH 135
#define SYNC_MEM_HASH 131
#define SYNC_MEM_POOL 130

View File

@ -45,6 +45,14 @@ trx_undo_rec_get_cmpl_info(
/* out: compiler info */
trx_undo_rec_t* undo_rec); /* in: undo log record */
/**************************************************************************
Returns TRUE if an undo log record contains an extern storage field. */
UNIV_INLINE
ibool
trx_undo_rec_get_extern_storage(
/*============================*/
/* out: TRUE if extern */
trx_undo_rec_t* undo_rec); /* in: undo log record */
/**************************************************************************
Reads the undo log record number. */
UNIV_INLINE
dulint
@ -65,6 +73,8 @@ trx_undo_rec_get_pars(
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /* out: compiler info, relevant only
for update type records */
ibool* updated_extern, /* out: TRUE if we updated an
externally stored fild */
dulint* undo_no, /* out: undo log record number */
dulint* table_id); /* out: table id */
/***********************************************************************
@ -272,7 +282,11 @@ record */
do not change */
#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by
this and ORed to the type above */
#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl
to denote that we updated external
storage fields: used by purge to
free the external storage */
/* Operation type flags used in trx_undo_report_row_operation */
#define TRX_UNDO_INSERT_OP 1
#define TRX_UNDO_MODIFY_OP 2

View File

@ -30,6 +30,23 @@ trx_undo_rec_get_cmpl_info(
return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
}
/**************************************************************************
Returns TRUE if an undo log record contains an extern storage field. */
UNIV_INLINE
ibool
trx_undo_rec_get_extern_storage(
/*============================*/
/* out: TRUE if extern */
trx_undo_rec_t* undo_rec) /* in: undo log record */
{
if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
return(TRUE);
}
return(FALSE);
}
/**************************************************************************
Reads the undo log record number. */
UNIV_INLINE

View File

@ -27,6 +27,23 @@ Created 3/26/1996 Heikki Tuuri
/* The transaction system */
extern trx_sys_t* trx_sys;
/* Doublewrite system */
extern trx_doublewrite_t* trx_doublewrite;
/********************************************************************
Creates the doublewrite buffer at a database start. The header of the
doublewrite buffer is placed on the trx system header page. */
void
trx_sys_create_doublewrite_buf(void);
/*================================*/
/********************************************************************
At a database startup uses a possible doublewrite buffer to restore
half-written pages in the data files. */
void
trx_sys_doublewrite_restore_corrupt_pages(void);
/*===========================================*/
/*******************************************************************
Checks if a page address is the trx sys header page. */
UNIV_INLINE
@ -235,6 +252,59 @@ therefore 256 */
segment specification slots */
/*-------------------------------------------------------------*/
/* The offset of the doublewrite buffer header on the trx system header page */
#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
/*-------------------------------------------------------------*/
#define TRX_SYS_DOUBLEWRITE_FSEG 0 /* fseg header of the fseg
containing the doublewrite
buffer */
#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE
/* 4-byte magic number which
shows if we already have
created the doublewrite
buffer */
#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE)
/* page number of the
first page in the first
sequence of 64
(= FSP_EXTENT_SIZE) consecutive
pages in the doublewrite
buffer */
#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE)
/* page number of the
first page in the second
sequence of 64 consecutive
pages in the doublewrite
buffer */
#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /* we repeat the above 3
numbers so that if the trx
sys header is half-written
to disk, we still may be able
to recover the information */
/*-------------------------------------------------------------*/
#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
/* Doublewrite control struct */
struct trx_doublewrite_struct{
mutex_t mutex; /* mutex protecting the first_free field and
write_buf */
ulint block1; /* the page number of the first
doublewrite block (64 pages) */
ulint block2; /* page number of the second block */
ulint first_free; /* first free position in write_buf measured
in units of UNIV_PAGE_SIZE */
byte* write_buf; /* write buffer used in writing to the
doublewrite buffer, aligned to an
address divisible by UNIV_PAGE_SIZE
(which is required by Windows aio) */
byte* write_buf_unaligned; /* pointer to write_buf, but unaligned */
buf_block_t**
buf_block_arr; /* array to store pointers to the buffer
blocks which have been cached to write_buf */
};
/* The transaction system central memory data structure; protected by the
kernel mutex */
struct trx_sys_struct{

View File

@ -15,6 +15,7 @@ Created 3/26/1996 Heikki Tuuri
/* Memory objects */
typedef struct trx_struct trx_t;
typedef struct trx_sys_struct trx_sys_t;
typedef struct trx_doublewrite_struct trx_doublewrite_t;
typedef struct trx_sig_struct trx_sig_t;
typedef struct trx_rseg_struct trx_rseg_t;
typedef struct trx_undo_struct trx_undo_t;

View File

@ -341,7 +341,9 @@ struct trx_undo_struct{
have delete marked records, because of
a delete of a row or an update of an
indexed field; purge is then
necessary. */
necessary; also TRUE if the transaction
has updated an externally stored
field */
dulint trx_id; /* id of the trx assigned to the undo
log */
ibool dict_operation; /* TRUE if a dict operation trx */

View File

@ -9,11 +9,12 @@ Created 1/20/1994 Heikki Tuuri
#ifndef univ_i
#define univ_i
#undef UNIV_INTEL_X86
#if (defined(_WIN32) || defined(_WIN64)) && !defined(MYSQL_SERVER)
#if (defined(_WIN32) || defined(_WIN64))
#define __WIN__
#ifndef MYSQL_SERVER
#include <windows.h>
#endif
/* If you want to check for errors with compiler level -W4,
comment out the above include of windows.h and let the following defines
@ -40,10 +41,8 @@ subdirectory of 'mysql'. */
#include <global.h>
#include <my_pthread.h>
#ifndef __WIN__
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
#include <sys/stat.h>
#endif
#undef PACKAGE
#undef VERSION
@ -63,19 +62,21 @@ subdirectory of 'mysql'. */
/* DEBUG VERSION CONTROL
===================== */
/*
#define UNIV_SYNC_DEBUG
*/
/* Make a non-inline debug version */
/*
#define UNIV_DEBUG
#define UNIV_MEM_DEBUG
#define UNIV_SYNC_DEBUG
#define UNIV_SEARCH_DEBUG
#define UNIV_IBUF_DEBUG
#define UNIV_SYNC_PERF_STAT
#define UNIV_SEARCH_PERF_STAT
#define UNIV_DEBUG_FILE_ACCESSES
*/
#define UNIV_LIGHT_MEM_DEBUG
@ -192,6 +193,13 @@ headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
has the SQL NULL as its value. */
#define UNIV_SQL_NULL ULINT_UNDEFINED
/* Lengths which are not UNIV_SQL_NULL, but bigger than the following
number indicate that a field contains a reference to an externally
stored part of the field in the tablespace. The length field then
contains the sum of the following flag and the locally stored len. */
#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE)
/* The following definition of __FILE__ removes compiler warnings
associated with const char* / char* mismatches with __FILE__ */

View File

@ -41,7 +41,7 @@ extern ulint* ut_dbg_null_ptr;
}\
if (ut_dbg_stop_threads) {\
fprintf(stderr,\
"Innobase: Thread %lu stopped in file %s line %lu\n",\
"InnoDB: Thread %lu stopped in file %s line %lu\n",\
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
os_thread_sleep(1000000000);\
}\
@ -50,19 +50,17 @@ extern ulint* ut_dbg_null_ptr;
#define ut_error {\
ulint dbg_i;\
fprintf(stderr,\
"Innobase: Assertion failure in thread %lu in file %s line %lu\n",\
"InnoDB: Assertion failure in thread %lu in file %s line %lu\n",\
os_thread_get_curr_id(), IB__FILE__, (ulint)__LINE__);\
fprintf(stderr,\
"Innobase: we intentionally generate a memory trap.\n");\
"InnoDB: We intentionally generate a memory trap.\n");\
fprintf(stderr,\
"Innobase: Send a bug report to mysql@lists.mysql.com\n");\
"InnoDB: Send a detailed bug report to mysql@lists.mysql.com\n");\
ut_dbg_stop_threads = TRUE;\
dbg_i = *(ut_dbg_null_ptr);\
printf("%lu", dbg_i);\
}
#ifdef UNIV_DEBUG
#define ut_ad(EXPR) ut_a(EXPR)
#define ut_d(EXPR) {EXPR;}

View File

@ -11,8 +11,7 @@ Created 1/20/1994 Heikki Tuuri
#include "univ.i"
#include <time.h>
#include <m_ctype.h>
#include <ctype.h>
typedef time_t ib_time_t;

View File

@ -3219,6 +3219,7 @@ lock_rec_print(
ulint space;
ulint page_no;
ulint i;
ulint count = 0;
mtr_t mtr;
ut_ad(mutex_own(&kernel_mutex));
@ -3230,7 +3231,8 @@ lock_rec_print(
printf("\nRECORD LOCKS space id %lu page no %lu n bits %lu",
space, page_no, lock_rec_get_n_bits(lock));
printf(" index %s trx id %lu %lu", (lock->index)->name,
printf(" table %s index %s trx id %lu %lu",
lock->index->table->name, lock->index->name,
(lock->trx)->id.high, (lock->trx)->id.low);
if (lock_get_mode(lock) == LOCK_S) {
@ -3281,10 +3283,18 @@ lock_rec_print(
rec_print(page_find_rec_with_heap_no(page, i));
}
count++;
printf("\n");
}
}
if (count >= 3) {
printf(
"3 LOCKS PRINTED FOR THIS TRX AND PAGE: SUPPRESSING FURTHER PRINTS\n");
goto end_prints;
}
}
end_prints:
mtr_commit(&mtr);
}
@ -3335,7 +3345,6 @@ lock_print_info(void)
lock_mutex_enter_kernel();
printf("------------------------------------\n");
printf("LOCK INFO:\n");
printf("Number of locks in the record hash table %lu\n",
lock_get_n_rec_locks());
@ -3352,7 +3361,7 @@ loop:
if (trx == NULL) {
lock_mutex_exit_kernel();
lock_validate();
/* lock_validate(); */
return;
}
@ -3360,6 +3369,19 @@ loop:
if (nth_lock == 0) {
printf("\nLOCKS FOR TRANSACTION ID %lu %lu\n", trx->id.high,
trx->id.low);
if (trx->que_state == TRX_QUE_LOCK_WAIT) {
printf(
"################# TRX IS WAITING FOR THE LOCK: ###\n");
if (lock_get_type(trx->wait_lock) == LOCK_REC) {
lock_rec_print(trx->wait_lock);
} else {
lock_table_print(trx->wait_lock);
}
printf(
"##################################################\n");
}
}
i = 0;
@ -3409,6 +3431,16 @@ loop:
nth_lock++;
if (nth_lock >= 25) {
printf(
"25 LOCKS PRINTED FOR THIS TRX: SUPPRESSING FURTHER PRINTS\n");
nth_trx++;
nth_lock = 0;
goto loop;
}
goto loop;
}

View File

@ -838,7 +838,9 @@ log_io_complete(
/* It was a checkpoint write */
group = (log_group_t*)((ulint)group - 1);
if (srv_unix_file_flush_method == SRV_UNIX_LITTLESYNC) {
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
fil_flush(group->space_id);
}
@ -847,7 +849,9 @@ log_io_complete(
return;
}
if (srv_unix_file_flush_method == SRV_UNIX_LITTLESYNC) {
if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
fil_flush(group->space_id);
}
@ -1478,7 +1482,7 @@ log_checkpoint(
recv_apply_hashed_log_recs(TRUE);
}
if (srv_unix_file_flush_method == SRV_UNIX_LITTLESYNC) {
if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
fil_flush_file_spaces(FIL_TABLESPACE);
}
@ -1885,10 +1889,11 @@ loop:
fil_reserve_right_to_open();
file_handle = os_file_create(name, open_mode, OS_FILE_AIO,
&ret);
OS_DATA_FILE, &ret);
if (!ret && (open_mode == OS_FILE_CREATE)) {
file_handle = os_file_create(name, OS_FILE_OPEN,
OS_FILE_AIO, &ret);
OS_FILE_AIO, OS_DATA_FILE, &ret);
}
if (!ret) {

View File

@ -2234,7 +2234,8 @@ try_open_again:
fil_reserve_right_to_open();
file_handle = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
file_handle = os_file_create(name, OS_FILE_OPEN,
OS_FILE_LOG, OS_FILE_AIO, &ret);
if (ret == FALSE) {
fil_release_right_to_open();

View File

@ -10,6 +10,7 @@ Created 10/21/1995 Heikki Tuuri
#include "os0sync.h"
#include "ut0mem.h"
#include "srv0srv.h"
#include "trx0sys.h"
#undef HAVE_FDATASYNC
@ -74,9 +75,12 @@ typedef struct os_aio_array_struct os_aio_array_t;
struct os_aio_array_struct{
os_mutex_t mutex; /* the mutex protecting the aio array */
os_event_t not_full; /* The event which is set to signaled
os_event_t not_full; /* The event which is set to the signaled
state when there is space in the aio
outside the ibuf segment */
os_event_t is_empty; /* The event which is set to the signaled
state when there are no pending i/os
in this array */
ulint n_slots; /* Total number of slots in the aio array.
This must be divisible by n_threads. */
ulint n_segments;/* Number of segments in the aio array of
@ -254,6 +258,7 @@ os_file_create(
if a new is created or an old overwritten */
ulint purpose,/* in: OS_FILE_AIO, if asynchronous, non-buffered i/o
is desired, OS_FILE_NORMAL, if any normal file */
ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */
ibool* success)/* out: TRUE if succeed, FALSE if error */
{
#ifdef __WIN__
@ -347,9 +352,11 @@ try_again:
UT_NOT_USED(purpose);
#ifdef O_DSYNC
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
create_flag = create_flag | O_DSYNC;
#ifdef O_SYNC
if ((!srv_use_doublewrite_buf || type != OS_DATA_FILE)
&& srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
create_flag = create_flag | O_SYNC;
}
#endif
if (create_mode == OS_FILE_CREATE) {
@ -548,12 +555,6 @@ os_file_flush(
#else
int ret;
#ifdef O_DSYNC
if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) {
return(TRUE);
}
#endif
#ifdef HAVE_FDATASYNC
ret = fdatasync(file);
#else
@ -634,7 +635,8 @@ os_file_pwrite(
ret = pwrite(file, buf, n, offs);
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
&& !trx_doublewrite) {
/* Always do fsync to reduce the probability that when
the OS crashes, a database page is only partially
@ -663,7 +665,8 @@ os_file_pwrite(
ret = write(file, buf, n);
if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC) {
&& srv_unix_file_flush_method != SRV_UNIX_NOSYNC
&& !trx_doublewrite) {
/* Always do fsync to reduce the probability that when
the OS crashes, a database page is only partially
@ -822,7 +825,9 @@ try_again:
/* Always do fsync to reduce the probability that when the OS crashes,
a database page is only partially physically written to disk. */
ut_a(TRUE == os_file_flush(file));
if (!trx_doublewrite) {
ut_a(TRUE == os_file_flush(file));
}
os_mutex_exit(os_file_seek_mutexes[i]);
@ -897,6 +902,10 @@ os_aio_array_create(
array->mutex = os_mutex_create(NULL);
array->not_full = os_event_create(NULL);
array->is_empty = os_event_create(NULL);
os_event_set(array->is_empty);
array->n_slots = n;
array->n_segments = n_segments;
array->n_reserved = 0;
@ -996,6 +1005,17 @@ os_aio_init(
#endif
}
/****************************************************************************
Waits until there are no pending writes in os_aio_write_array. There can
be other, synchronous, pending writes. */
void
os_aio_wait_until_no_pending_writes(void)
/*=====================================*/
{
os_event_wait(os_aio_write_array->is_empty);
}
/**************************************************************************
Calculates segment number for a slot. */
static
@ -1188,6 +1208,10 @@ loop:
array->n_reserved++;
if (array->n_reserved == 1) {
os_event_reset(array->is_empty);
}
if (array->n_reserved == array->n_slots) {
os_event_reset(array->not_full);
}
@ -1261,6 +1285,10 @@ os_aio_array_free_slot(
os_event_set(array->not_full);
}
if (array->n_reserved == 0) {
os_event_set(array->is_empty);
}
#ifdef WIN_ASYNC_IO
os_event_reset(slot->control.hEvent);
#endif
@ -1374,6 +1402,7 @@ os_aio(
DWORD len = n;
void* dummy_mess1;
void* dummy_mess2;
ulint dummy_type;
#endif
ulint err = 0;
ibool retry;
@ -1486,8 +1515,9 @@ try_again:
use the same wait mechanism as for async i/o */
return(os_aio_windows_handle(ULINT_UNDEFINED,
slot->pos,
&dummy_mess1, &dummy_mess2));
slot->pos,
&dummy_mess1, &dummy_mess2,
&dummy_type));
}
return(TRUE);
@ -1544,7 +1574,8 @@ os_aio_windows_handle(
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2)
void** message2,
ulint* type) /* out: OS_FILE_WRITE or ..._READ */
{
os_aio_array_t* array;
os_aio_slot_t* slot;
@ -1589,10 +1620,12 @@ os_aio_windows_handle(
*message1 = slot->message1;
*message2 = slot->message2;
*type = slot->type;
if (ret && len == slot->len) {
ret_val = TRUE;
if (slot->type == OS_FILE_WRITE) {
if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
ut_a(TRUE == os_file_flush(slot->file));
}
} else {
@ -1676,7 +1709,7 @@ os_aio_posix_handle(
*message1 = slot->message1;
*message2 = slot->message2;
if (slot->type == OS_FILE_WRITE) {
if (slot->type == OS_FILE_WRITE && !trx_doublewrite) {
ut_a(TRUE == os_file_flush(slot->file));
}
@ -1706,7 +1739,8 @@ os_aio_simulated_handle(
the aio operation failed, these output
parameters are valid and can be used to
restart the operation, for example */
void** message2)
void** message2,
ulint* type) /* out: OS_FILE_WRITE or ..._READ */
{
os_aio_array_t* array;
ulint segment;
@ -1903,6 +1937,8 @@ slot_io_done:
*message1 = slot->message1;
*message2 = slot->message2;
*type = slot->type;
os_mutex_exit(array->mutex);
os_aio_array_free_slot(array, slot);
@ -1986,13 +2022,13 @@ os_aio_print(void)
os_aio_slot_t* slot;
ulint n_reserved;
ulint i;
printf("Pending normal aio reads:\n");
array = os_aio_read_array;
loop:
ut_a(array);
printf("INFO OF AN AIO ARRAY\n");
os_mutex_enter(array->mutex);
ut_a(array->n_slots > 0);
@ -2019,24 +2055,29 @@ loop:
os_mutex_exit(array->mutex);
if (array == os_aio_read_array) {
printf("Pending aio writes:\n");
array = os_aio_write_array;
goto loop;
}
if (array == os_aio_write_array) {
printf("Pending insert buffer aio reads:\n");
array = os_aio_ibuf_array;
goto loop;
}
if (array == os_aio_ibuf_array) {
printf("Pending log writes or reads:\n");
array = os_aio_log_array;
goto loop;
}
if (array == os_aio_log_array) {
printf("Pending synchronous reads or writes:\n");
array = os_aio_sync_array;
goto loop;

View File

@ -1019,16 +1019,16 @@ page_cur_delete_rec(
page_cur_t* cursor, /* in: a page cursor */
mtr_t* mtr) /* in: mini-transaction handle */
{
page_dir_slot_t* cur_dir_slot;
page_dir_slot_t* prev_slot;
page_t* page;
rec_t* current_rec;
rec_t* prev_rec = NULL;
rec_t* next_rec;
ulint cur_slot_no;
page_dir_slot_t* cur_dir_slot;
page_dir_slot_t* prev_slot;
ulint cur_n_owned;
rec_t* rec;
ut_ad(cursor && mtr);
page = page_cur_get_page(cursor);
@ -1037,7 +1037,7 @@ page_cur_delete_rec(
/* The record must not be the supremum or infimum record. */
ut_ad(current_rec != page_get_supremum_rec(page));
ut_ad(current_rec != page_get_infimum_rec(page));
/* Save to local variables some data associated with current_rec */
cur_slot_no = page_dir_find_owner_slot(current_rec);
cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no);

View File

@ -2028,11 +2028,7 @@ pars_complete_graph_for_exec(
que_node_set_parent(node, thr);
mutex_enter(&kernel_mutex);
trx->graph = NULL;
mutex_exit(&kernel_mutex);
return(thr);
}

View File

@ -295,14 +295,18 @@ This function is used to compare a data tuple to a physical record.
Only dtuple->n_fields_cmp first fields are taken into account for
the the data tuple! If we denote by n = n_fields_cmp, then rec must
have either m >= n fields, or it must differ from dtuple in some of
the m fields rec has. */
the m fields rec has. If rec has an externally stored field we do not
compare it but return with value 0 if such a comparison should be
made. */
int
cmp_dtuple_rec_with_match(
/*======================*/
/* out: 1, 0, -1, if dtuple is greater, equal,
less than rec, respectively, when only the
common first fields are compared */
common first fields are compared, or
until the first externally stored field in
rec */
dtuple_t* dtuple, /* in: data tuple */
rec_t* rec, /* in: physical record which differs from
dtuple in some of the common fields, or which
@ -344,7 +348,8 @@ cmp_dtuple_rec_with_match(
ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple));
ut_ad(cur_field <= rec_get_n_fields(rec));
/* Match fields in a loop; stop if we run out of fields in dtuple */
/* Match fields in a loop; stop if we run out of fields in dtuple
or find an externally stored field */
while (cur_field < dtuple_get_n_fields_cmp(dtuple)) {
@ -357,7 +362,8 @@ cmp_dtuple_rec_with_match(
/* If we have matched yet 0 bytes, it may be that one or
both the fields are SQL null, or the record or dtuple may be
the predefined minimum record */
the predefined minimum record, or the field is externally
stored */
if (cur_bytes == 0) {
if (cur_field == 0) {
@ -384,6 +390,15 @@ cmp_dtuple_rec_with_match(
}
}
if (rec_get_nth_field_extern_bit(rec, cur_field)) {
/* We do not compare to an externally
stored field */
ret = 0;
goto order_resolved;
}
if (dtuple_f_len == UNIV_SQL_NULL
|| rec_f_len == UNIV_SQL_NULL) {
@ -604,7 +619,8 @@ cmp_dtuple_rec_prefix_equal(
/*****************************************************************
This function is used to compare two physical records. Only the common
first fields are compared. */
first fields are compared, and if an externally stored field is
encountered, then 0 is returned. */
int
cmp_rec_rec_with_match(
@ -688,8 +704,18 @@ cmp_rec_rec_with_match(
goto order_resolved;
}
}
}
if (rec_get_nth_field_extern_bit(rec1, cur_field)
|| rec_get_nth_field_extern_bit(rec2, cur_field)) {
/* We do not compare to an externally
stored field */
ret = 0;
goto order_resolved;
}
if (rec1_f_len == UNIV_SQL_NULL
|| rec2_f_len == UNIV_SQL_NULL) {
@ -812,7 +838,8 @@ order_resolved:
Used in debug checking of cmp_dtuple_... .
This function is used to compare a data tuple to a physical record. If
dtuple has n fields then rec must have either m >= n fields, or it must
differ from dtuple in some of the m fields rec has. */
differ from dtuple in some of the m fields rec has. If encounters an
externally stored field, returns 0. */
static
int
cmp_debug_dtuple_rec_with_match(
@ -882,6 +909,14 @@ cmp_debug_dtuple_rec_with_match(
rec_f_data = rec_get_nth_field(rec, cur_field, &rec_f_len);
if (rec_get_nth_field_extern_bit(rec, cur_field)) {
/* We do not compare to an externally stored field */
ret = 0;
goto order_resolved;
}
ret = cmp_data_data(cur_type, dtuple_f_data, dtuple_f_len,
rec_f_data, rec_f_len);
if (ret != 0) {

View File

@ -1,7 +1,7 @@
/************************************************************************
Record manager
(c) 1994-1996 Innobase Oy
(c) 1994-2001 Innobase Oy
Created 5/30/1994 Heikki Tuuri
*************************************************************************/
@ -12,6 +12,9 @@ Created 5/30/1994 Heikki Tuuri
#include "rem0rec.ic"
#endif
#include "mtr0mtr.h"
#include "mtr0log.h"
/* PHYSICAL RECORD
===============
@ -21,7 +24,10 @@ found in index pages of the database, has the following format
represented on a higher text line):
| offset of the end of the last field of data, the most significant
bit is set to 1 if and only if the field is SQL-null |
bit is set to 1 if and only if the field is SQL-null,
if the offset is 2-byte, then the second most significant
bit is set to 1 if the field is stored on another page:
mostly this will occur in the case of big BLOB fields |
...
| offset of the end of the first field of data + the SQL-null bit |
| 4 bits used to delete mark a record, and mark a predefined
@ -122,7 +128,8 @@ rec_get_nth_field(
return(rec + os);
}
next_os = next_os & ~REC_2BYTE_SQL_NULL_MASK;
next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK
| REC_2BYTE_EXTERN_MASK);
}
*len = next_os - os;
@ -170,6 +177,60 @@ rec_set_nth_field_null_bit(
rec_2_set_field_end_info(rec, i, info);
}
/***************************************************************
Sets the value of the ith field extern storage bit. */
void
rec_set_nth_field_extern_bit(
/*=========================*/
rec_t* rec, /* in: record */
ulint i, /* in: ith field */
ibool val, /* in: value to set */
mtr_t* mtr) /* in: mtr holding an X-latch to the page where
rec is, or NULL; in the NULL case we do not
write to log about the change */
{
ulint info;
ut_a(!rec_get_1byte_offs_flag(rec));
ut_a(i < rec_get_n_fields(rec));
info = rec_2_get_field_end_info(rec, i);
if (val) {
info = info | REC_2BYTE_EXTERN_MASK;
} else {
info = info & ~REC_2BYTE_EXTERN_MASK;
}
if (mtr) {
mlog_write_ulint(rec - REC_N_EXTRA_BYTES - 2 * (i + 1), info,
MLOG_2BYTES, mtr);
} else {
rec_2_set_field_end_info(rec, i, info);
}
}
/***************************************************************
Sets TRUE the extern storage bits of fields mentioned in an array. */
void
rec_set_field_extern_bits(
/*======================*/
rec_t* rec, /* in: record */
ulint* vec, /* in: array of field numbers */
ulint n_fields, /* in: number of fields numbers */
mtr_t* mtr) /* in: mtr holding an X-latch to the page
where rec is, or NULL; in the NULL case we
do not write to log about the change */
{
ulint i;
for (i = 0; i < n_fields; i++) {
rec_set_nth_field_extern_bit(rec, vec[i], TRUE, mtr);
}
}
/***************************************************************
Sets a record field to SQL null. The physical size of the field is not
changed. */

View File

@ -234,7 +234,13 @@ row_ins_clust_index_entry_by_modify(
depending on whether mtr holds just a leaf
latch or also a tree latch */
btr_cur_t* cursor, /* in: B-tree cursor */
big_rec_t** big_rec,/* out: possible big rec vector of fields
which have to be stored externally by the
caller */
dtuple_t* entry, /* in: index entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr */
{
@ -243,8 +249,10 @@ row_ins_clust_index_entry_by_modify(
upd_t* update;
ulint err;
ut_ad((cursor->index)->type & DICT_CLUSTERED);
ut_ad(cursor->index->type & DICT_CLUSTERED);
*big_rec = NULL;
rec = btr_cur_get_rec(cursor);
ut_ad(rec_get_deleted_flag(rec));
@ -254,21 +262,21 @@ row_ins_clust_index_entry_by_modify(
/* Build an update vector containing all the fields to be modified;
NOTE that this vector may contain also system columns! */
update = row_upd_build_difference(cursor->index, entry, rec, heap);
update = row_upd_build_difference(cursor->index, entry, ext_vec,
n_ext_vec, rec, heap);
if (mode == BTR_MODIFY_LEAF) {
/* Try optimistic updating of the record, keeping changes
within the page */
err = btr_cur_optimistic_update(0, cursor, update, 0, thr,
mtr);
if ((err == DB_OVERFLOW) || (err == DB_UNDERFLOW)) {
err = btr_cur_optimistic_update(0, cursor, update, 0, thr, mtr);
if (err == DB_OVERFLOW || err == DB_UNDERFLOW) {
err = DB_FAIL;
}
} else {
ut_ad(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(0, cursor, update, 0, thr,
mtr);
ut_a(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(0, cursor, big_rec, update,
0, thr, mtr);
}
mem_heap_free(heap);
@ -597,14 +605,18 @@ row_ins_index_entry_low(
pessimistic descent down the index tree */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr) /* in: query thread */
{
btr_cur_t cursor;
ulint modify;
rec_t* dummy_rec;
rec_t* insert_rec;
rec_t* rec;
ulint err;
ulint n_unique;
big_rec_t* big_rec = NULL;
mtr_t mtr;
log_free_check();
@ -682,24 +694,54 @@ row_ins_index_entry_low(
if (index->type & DICT_CLUSTERED) {
err = row_ins_clust_index_entry_by_modify(mode,
&cursor, entry,
thr, &mtr);
&cursor, &big_rec,
entry,
ext_vec, n_ext_vec,
thr, &mtr);
} else {
err = row_ins_sec_index_entry_by_modify(&cursor,
thr, &mtr);
}
} else if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_insert(0, &cursor, entry,
&dummy_rec, thr, &mtr);
} else {
ut_ad(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_insert(0, &cursor, entry,
&dummy_rec, thr, &mtr);
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_insert(0, &cursor, entry,
&insert_rec, &big_rec, thr, &mtr);
} else {
ut_a(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_insert(0, &cursor, entry,
&insert_rec, &big_rec, thr, &mtr);
}
if (err == DB_SUCCESS) {
if (ext_vec) {
rec_set_field_extern_bits(insert_rec,
ext_vec, n_ext_vec, &mtr);
}
}
}
function_exit:
mtr_commit(&mtr);
if (big_rec) {
mtr_start(&mtr);
btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
BTR_MODIFY_TREE, &cursor, 0, &mtr);
err = btr_store_big_rec_extern_fields(index,
btr_cur_get_rec(&cursor),
big_rec, &mtr);
if (modify) {
dtuple_big_rec_free(big_rec);
} else {
dtuple_convert_back_big_rec(index, entry, big_rec);
}
mtr_commit(&mtr);
}
return(err);
}
@ -716,14 +758,17 @@ row_ins_index_entry(
DB_DUPLICATE_KEY, or some other error code */
dict_index_t* index, /* in: index */
dtuple_t* entry, /* in: index entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
que_thr_t* thr) /* in: query thread */
{
ulint err;
/* Try first optimistic descent to the B-tree */
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, thr);
err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry,
ext_vec, n_ext_vec, thr);
if (err != DB_FAIL) {
return(err);
@ -731,8 +776,8 @@ row_ins_index_entry(
/* Try then pessimistic descent to the B-tree */
err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry, thr);
err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry,
ext_vec, n_ext_vec, thr);
return(err);
}
@ -784,7 +829,7 @@ row_ins_index_entry_step(
ut_ad(dtuple_check_typed(node->entry));
err = row_ins_index_entry(node->index, node->entry, thr);
err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr);
return(err);
}

View File

@ -625,7 +625,8 @@ row_update_for_mysql(
ut_ad(prebuilt && trx);
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
UT_NOT_USED(mysql_rec);
node = prebuilt->upd_node;
clust_index = dict_table_get_first_index(table);
@ -777,7 +778,9 @@ row_get_mysql_key_number_for_index(
}
/*************************************************************************
Does a table creation operation for MySQL. */
Does a table creation operation for MySQL. If the name of the created
table ends to characters INNODB_MONITOR, then this also starts
printing of monitor output by the master thread. */
int
row_create_table_for_mysql(
@ -789,6 +792,8 @@ row_create_table_for_mysql(
tab_node_t* node;
mem_heap_t* heap;
que_thr_t* thr;
ulint namelen;
ulint keywordlen;
ulint err;
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
@ -833,6 +838,20 @@ row_create_table_for_mysql(
}
trx->error_state = DB_SUCCESS;
} else {
namelen = ut_strlen(table->name);
keywordlen = ut_strlen("innodb_monitor");
if (namelen >= keywordlen
&& 0 == ut_memcmp(table->name + namelen - keywordlen,
"innodb_monitor", keywordlen)) {
/* Table name ends to characters innodb_monitor:
start monitor prints */
srv_print_innodb_monitor = TRUE;
}
}
mutex_exit(&(dict_sys->mutex));
@ -900,7 +919,9 @@ row_create_index_for_mysql(
}
/*************************************************************************
Drops a table for MySQL. */
Drops a table for MySQL. If the name of the dropped table ends to
characters INNODB_MONITOR, then this also stops printing of monitor
output by the master thread. */
int
row_drop_table_for_mysql(
@ -918,11 +939,26 @@ row_drop_table_for_mysql(
char* str1;
char* str2;
ulint len;
ulint namelen;
ulint keywordlen;
char buf[10000];
ut_ad(trx->mysql_thread_id == os_thread_get_curr_id());
ut_a(name != NULL);
namelen = ut_strlen(name);
keywordlen = ut_strlen("innodb_monitor");
if (namelen >= keywordlen
&& 0 == ut_memcmp(name + namelen - keywordlen,
"innodb_monitor", keywordlen)) {
/* Table name ends to characters innodb_monitor:
stop monitor prints */
srv_print_innodb_monitor = FALSE;
}
/* We use the private SQL parser of Innobase to generate the
query graphs needed in deleting the dictionary data from system
tables in Innobase. Deleting a row from SYS_INDEXES table also

View File

@ -347,20 +347,36 @@ row_purge_del_mark(
}
/***************************************************************
Purges an update of an existing record. */
Purges an update of an existing record. Also purges an update of a delete
marked record if that record contained an externally stored field. */
static
void
row_purge_upd_exist(
/*================*/
row_purge_upd_exist_or_extern(
/*==========================*/
purge_node_t* node, /* in: row purge node */
que_thr_t* thr) /* in: query thread */
{
mem_heap_t* heap;
dtuple_t* entry;
dict_index_t* index;
upd_field_t* ufield;
ibool is_insert;
ulint rseg_id;
ulint page_no;
ulint offset;
ulint internal_offset;
byte* data_field;
ulint data_field_len;
ulint i;
mtr_t mtr;
ut_ad(node && thr);
if (node->rec_type == TRX_UNDO_UPD_DEL_REC) {
goto skip_secondaries;
}
heap = mem_heap_create(1024);
while (node->index != NULL) {
@ -378,6 +394,53 @@ row_purge_upd_exist(
}
mem_heap_free(heap);
skip_secondaries:
/* Free possible externally stored fields */
for (i = 0; i < upd_get_n_fields(node->update); i++) {
ufield = upd_get_nth_field(node->update, i);
if (ufield->extern_storage) {
/* We use the fact that new_val points to
node->undo_rec and get thus the offset of
dfield data inside the unod record. Then we
can calculate from node->roll_ptr the file
address of the new_val data */
internal_offset = ((byte*)ufield->new_val.data)
- node->undo_rec;
ut_a(internal_offset < UNIV_PAGE_SIZE);
trx_undo_decode_roll_ptr(node->roll_ptr,
&is_insert, &rseg_id,
&page_no, &offset);
mtr_start(&mtr);
/* We have to acquire an X-latch to the clustered
index tree */
index = dict_table_get_first_index(node->table);
mtr_x_lock(dict_tree_get_lock(index->tree), &mtr);
/* We assume in purge of externally stored fields
that the space id of the undo log record is 0! */
data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr)
+ offset + internal_offset;
buf_page_dbg_add_level(buf_frame_align(data_field),
SYNC_TRX_UNDO_PAGE);
data_field_len = ufield->new_val.len;
btr_free_externally_stored_field(index, data_field,
data_field_len, &mtr);
mtr_commit(&mtr);
}
}
}
/***************************************************************
@ -388,6 +451,9 @@ row_purge_parse_undo_rec(
/*=====================*/
/* out: TRUE if purge operation required */
purge_node_t* node, /* in: row undo node */
ibool* updated_extern,
/* out: TRUE if an externally stored field
was updated */
que_thr_t* thr) /* in: query thread */
{
dict_index_t* clust_index;
@ -403,10 +469,10 @@ row_purge_parse_undo_rec(
ut_ad(node && thr);
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
&undo_no, &table_id);
updated_extern, &undo_no, &table_id);
node->rec_type = type;
if (type == TRX_UNDO_UPD_DEL_REC) {
if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) {
return(FALSE);
}
@ -416,7 +482,7 @@ row_purge_parse_undo_rec(
node->table = NULL;
if (type == TRX_UNDO_UPD_EXIST_REC
&& cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
&& cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) {
/* Purge requires no changes to indexes: we may return */
@ -455,8 +521,11 @@ row_purge_parse_undo_rec(
/* Read to the partial row the fields that occur in indexes */
ptr = trx_undo_rec_get_partial_row(ptr, clust_index, &(node->row),
node->heap);
if (!cmpl_info & UPD_NODE_NO_ORD_CHANGE) {
ptr = trx_undo_rec_get_partial_row(ptr, clust_index,
&(node->row), node->heap);
}
return(TRUE);
}
@ -475,6 +544,7 @@ row_purge(
{
dulint roll_ptr;
ibool purge_needed;
ibool updated_extern;
ut_ad(node && thr);
@ -494,7 +564,8 @@ row_purge(
if (node->undo_rec == &trx_purge_dummy_rec) {
purge_needed = FALSE;
} else {
purge_needed = row_purge_parse_undo_rec(node, thr);
purge_needed = row_purge_parse_undo_rec(node, &updated_extern,
thr);
}
if (purge_needed) {
@ -503,11 +574,13 @@ row_purge(
node->index = dict_table_get_next_index(
dict_table_get_first_index(node->table));
if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
row_purge_upd_exist(node, thr);
} else {
ut_ad(node->rec_type == TRX_UNDO_DEL_MARK_REC);
if (node->rec_type == TRX_UNDO_DEL_MARK_REC) {
row_purge_del_mark(node, thr);
} else if (updated_extern
|| node->rec_type == TRX_UNDO_UPD_EXIST_REC) {
row_purge_upd_exist_or_extern(node, thr);
}
if (node->found_clust) {

View File

@ -146,15 +146,17 @@ row_build_index_entry(
/***********************************************************************
An inverse function to dict_row_build_index_entry. Builds a row from a
record in a clustered index. */
record in a clustered index. NOTE that externally stored (often big)
fields are always copied to heap. */
dtuple_t*
row_build(
/*======*/
/* out, own: row built; see the NOTE below! */
ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS:
the former copies also the data fields to
heap as the latter only places pointers to
ulint type, /* in: ROW_COPY_POINTERS, ROW_COPY_DATA, or
ROW_COPY_ALSO_EXTERNALS,
the two last copy also the data fields to
heap as the first only places pointers to
data fields on the index page, and thus is
more efficient */
dict_index_t* index, /* in: clustered index */
@ -170,19 +172,19 @@ row_build(
{
dtuple_t* row;
dict_table_t* table;
ulint n_fields;
ulint i;
dict_col_t* col;
dfield_t* dfield;
ulint n_fields;
byte* field;
ulint len;
ulint row_len;
dict_col_t* col;
byte* buf;
ulint i;
ut_ad(index && rec && heap);
ut_ad(index->type & DICT_CLUSTERED);
if (type == ROW_COPY_DATA) {
if (type != ROW_COPY_POINTERS) {
/* Take a copy of rec to heap */
buf = mem_heap_alloc(heap, rec_get_size(rec));
rec = rec_copy(buf, rec);
@ -207,6 +209,13 @@ row_build(
dfield = dtuple_get_nth_field(row, dict_col_get_no(col));
field = rec_get_nth_field(rec, i, &len);
if (type == ROW_COPY_ALSO_EXTERNALS
&& rec_get_nth_field_extern_bit(rec, i)) {
field = btr_rec_copy_externally_stored_field(rec,
i, &len, heap);
}
dfield_set_data(dfield, field, len);
}
@ -215,6 +224,7 @@ row_build(
return(row);
}
#ifdef notdefined
/***********************************************************************
An inverse function to dict_row_build_index_entry. Builds a row from a
record in a clustered index. */
@ -229,7 +239,9 @@ row_build_to_tuple(
directly into this record, therefore,
the buffer page of this record must be
at least s-latched and the latch held
as long as the row dtuple is used! */
as long as the row dtuple is used!
NOTE 2: does not work with externally
stored fields! */
{
dict_table_t* table;
ulint n_fields;
@ -265,9 +277,11 @@ row_build_to_tuple(
ut_ad(dtuple_check_typed(row));
}
#endif
/***********************************************************************
Converts an index record to a typed data tuple. */
Converts an index record to a typed data tuple. NOTE that externally
stored (often big) fields are NOT copied to heap. */
dtuple_t*
row_rec_to_index_entry(

View File

@ -2036,7 +2036,8 @@ row_sel_store_mysql_rec(
which was described in prebuilt's
template */
{
mysql_row_templ_t* templ;
mysql_row_templ_t* templ;
mem_heap_t* extern_field_heap = NULL;
byte* data;
ulint len;
byte* blob_buf;
@ -2059,6 +2060,24 @@ row_sel_store_mysql_rec(
data = rec_get_nth_field(rec, templ->rec_field_no, &len);
if (rec_get_nth_field_extern_bit(rec, templ->rec_field_no)) {
/* Copy an externally stored field to the temporary
heap */
if (prebuilt->trx->has_search_latch) {
rw_lock_s_unlock(&btr_search_latch);
prebuilt->trx->has_search_latch = FALSE;
}
extern_field_heap = mem_heap_create(UNIV_PAGE_SIZE);
data = btr_rec_copy_externally_stored_field(rec,
templ->rec_field_no, &len,
extern_field_heap);
ut_a(len != UNIV_SQL_NULL);
}
if (len != UNIV_SQL_NULL) {
if (templ->type == DATA_BLOB) {
@ -2081,6 +2100,10 @@ row_sel_store_mysql_rec(
mysql_rec + templ->mysql_col_offset,
templ->mysql_col_len, data, len,
templ->type, templ->is_unsigned);
if (extern_field_heap) {
mem_heap_free(extern_field_heap);
}
} else {
mysql_rec[templ->mysql_null_byte_offset] |=
(byte) (templ->mysql_null_bit_mask);
@ -2450,6 +2473,7 @@ row_search_for_mysql(
ibool unique_search_from_clust_index = FALSE;
ibool mtr_has_extra_clust_latch = FALSE;
ibool moves_up = FALSE;
ulint cnt = 0;
mtr_t mtr;
ut_ad(index && pcur && search_tuple);
@ -2457,6 +2481,11 @@ row_search_for_mysql(
ut_ad(sync_thread_levels_empty_gen(FALSE));
/* printf("Match mode %lu\n search tuple ", match_mode);
dtuple_print(search_tuple);
printf("N tables locked %lu\n", trx->mysql_n_tables_locked);
*/
if (direction == 0) {
prebuilt->n_rows_fetched = 0;
prebuilt->n_fetch_cached = 0;
@ -2528,6 +2557,8 @@ row_search_for_mysql(
mtr_commit(&mtr);
/* printf("%s record not found 1\n", index->name); */
return(DB_RECORD_NOT_FOUND);
}
@ -2537,10 +2568,7 @@ row_search_for_mysql(
unique_search_from_clust_index = TRUE;
/* Disable this optimization (hence FALSE below) until
the hang of Peter Zaitsev has been tracked down */
if (FALSE && trx->mysql_n_tables_locked == 0
if (trx->mysql_n_tables_locked == 0
&& !prebuilt->sql_stat_start) {
/* This is a SELECT query done as a consistent read,
@ -2568,14 +2596,21 @@ row_search_for_mysql(
mtr_commit(&mtr);
/* printf("%s shortcut\n", index->name); */
return(DB_SUCCESS);
} else if (shortcut == SEL_EXHAUSTED) {
mtr_commit(&mtr);
/* printf("%s record not found 2\n",
index->name); */
return(DB_RECORD_NOT_FOUND);
}
mtr_commit(&mtr);
mtr_start(&mtr);
}
}
@ -2656,7 +2691,12 @@ rec_loop:
cons_read_requires_clust_rec = FALSE;
rec = btr_pcur_get_rec(pcur);
/*
printf("Using index %s cnt %lu ", index->name, cnt);
printf("; Page no %lu\n",
buf_frame_get_page_no(buf_frame_align(rec)));
rec_print(rec);
*/
if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
/* The infimum record on a page cannot be in the result set,
@ -2697,12 +2737,15 @@ rec_loop:
/* Test if the index record matches completely to search_tuple
in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */
/* printf("Comparing rec and search tuple\n"); */
if (0 != cmp_dtuple_rec(search_tuple, rec)) {
btr_pcur_store_position(pcur, &mtr);
ret = DB_RECORD_NOT_FOUND;
/* printf("%s record not found 3\n", index->name); */
goto normal_return;
}
@ -2713,6 +2756,7 @@ rec_loop:
btr_pcur_store_position(pcur, &mtr);
ret = DB_RECORD_NOT_FOUND;
/* printf("%s record not found 4\n", index->name); */
goto normal_return;
}
@ -2881,6 +2925,8 @@ next_rec:
moved = sel_restore_position_for_mysql(BTR_SEARCH_LEAF, pcur,
moves_up, &mtr);
if (moved) {
cnt++;
goto rec_loop;
}
}
@ -2903,6 +2949,8 @@ next_rec:
goto normal_return;
}
cnt++;
goto rec_loop;
/*-------------------------------------------------------------*/
lock_wait_or_error:
@ -2928,7 +2976,9 @@ lock_wait_or_error:
goto rec_loop;
}
/* printf("Using index %s cnt %lu ret value %lu err\n", index->name,
cnt, err); */
return(err);
normal_return:
@ -2942,5 +2992,7 @@ normal_return:
ret = DB_SUCCESS;
}
/* printf("Using index %s cnt %lu ret value %lu\n", index->name,
cnt, err); */
return(ret);
}

View File

@ -242,11 +242,12 @@ row_undo_ins_parse_undo_rec(
dulint table_id;
ulint type;
ulint dummy;
ibool dummy_extern;
ut_ad(node && thr);
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, &undo_no,
&table_id);
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy,
&dummy_extern, &undo_no, &table_id);
ut_ad(type == TRX_UNDO_INSERT_REC);
node->rec_type = type;
@ -284,9 +285,9 @@ row_undo_ins(
row_undo_ins_parse_undo_rec(node, thr);
if (node->table == NULL) {
found = FALSE;
found = FALSE;
} else {
found = row_undo_search_clust_to_pcur(node, thr);
found = row_undo_search_clust_to_pcur(node, thr);
}
if (!found) {

View File

@ -94,12 +94,12 @@ row_undo_mod_clust_low(
mtr_t* mtr, /* in: mtr */
ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
big_rec_t* dummy_big_rec;
dict_index_t* index;
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
ibool success;
ibool do_remove;
index = dict_table_get_first_index(node->table);
@ -110,49 +110,80 @@ row_undo_mod_clust_low(
ut_ad(success);
if (mode == BTR_MODIFY_LEAF) {
err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG
| BTR_KEEP_SYS_FLAG,
btr_cur, node->update,
node->cmpl_info, thr, mtr);
} else {
ut_ad(mode == BTR_MODIFY_TREE);
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG
| BTR_KEEP_SYS_FLAG,
btr_cur, &dummy_big_rec, node->update,
node->cmpl_info, thr, mtr);
}
return(err);
}
/***************************************************************
Removes a clustered index record after undo if possible. */
static
ulint
row_undo_mod_remove_clust_low(
/*==========================*/
/* out: DB_SUCCESS, DB_FAIL, or error code:
we may run out of file space */
undo_node_t* node, /* in: row undo node */
que_thr_t* thr, /* in: query thread */
mtr_t* mtr, /* in: mtr */
ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
{
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
ibool success;
pcur = &(node->pcur);
btr_cur = btr_pcur_get_btr_cur(pcur);
success = btr_pcur_restore_position(mode, pcur, mtr);
if (!success) {
return(DB_SUCCESS);
}
/* Find out if we can remove the whole clustered index record */
if (node->rec_type == TRX_UNDO_UPD_DEL_REC
&& !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) {
do_remove = TRUE;
/* Ok, we can remove */
} else {
do_remove = FALSE;
return(DB_SUCCESS);
}
if (mode == BTR_MODIFY_LEAF) {
success = btr_cur_optimistic_delete(btr_cur, mtr);
if (do_remove) {
success = btr_cur_optimistic_delete(btr_cur, mtr);
if (success) {
err = DB_SUCCESS;
} else {
err = DB_FAIL;
}
if (success) {
err = DB_SUCCESS;
} else {
err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG
| BTR_KEEP_SYS_FLAG,
btr_cur, node->update,
node->cmpl_info, thr, mtr);
err = DB_FAIL;
}
} else {
ut_ad(mode == BTR_MODIFY_TREE);
if (do_remove) {
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr);
btr_cur_pessimistic_delete(&err, FALSE, btr_cur, mtr);
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
and restart with more file space */
} else {
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG
| BTR_NO_UNDO_LOG_FLAG
| BTR_KEEP_SYS_FLAG,
btr_cur, node->update,
node->cmpl_info, thr, mtr);
}
/* The delete operation may fail if we have little
file space left: TODO: easiest to crash the database
and restart with more file space */
}
return(err);
@ -204,10 +235,31 @@ row_undo_mod_clust(
err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE);
}
node->state = UNDO_NODE_FETCH_NEXT;
btr_pcur_commit_specify_mtr(pcur, &mtr);
if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) {
mtr_start(&mtr);
err = row_undo_mod_remove_clust_low(node, thr, &mtr,
BTR_MODIFY_LEAF);
if (err != DB_SUCCESS) {
btr_pcur_commit_specify_mtr(pcur, &mtr);
/* We may have to modify tree structure: do a
pessimistic descent down the index tree */
mtr_start(&mtr);
err = row_undo_mod_remove_clust_low(node, thr, &mtr,
BTR_MODIFY_TREE);
}
btr_pcur_commit_specify_mtr(pcur, &mtr);
}
node->state = UNDO_NODE_FETCH_NEXT;
trx_undo_rec_release(node->trx, node->undo_no);
if (more_vers && err == DB_SUCCESS) {
@ -388,7 +440,6 @@ row_undo_mod_del_unmark_sec(
mem_free(err_buf);
} else {
btr_cur = btr_pcur_get_btr_cur(&pcur);
err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG,
@ -546,11 +597,12 @@ row_undo_mod_parse_undo_rec(
ulint info_bits;
ulint type;
ulint cmpl_info;
ibool dummy_extern;
ut_ad(node && thr);
ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info,
&undo_no, &table_id);
&dummy_extern, &undo_no, &table_id);
node->rec_type = type;
node->table = dict_table_get_on_id(table_id, thr_get_trx(thr));
@ -598,10 +650,9 @@ row_undo_mod(
row_undo_mod_parse_undo_rec(node, thr);
if (node->table == NULL) {
found = FALSE;
found = FALSE;
} else {
found = row_undo_search_clust_to_pcur(node, thr);
found = row_undo_search_clust_to_pcur(node, thr);
}
if (!found) {

View File

@ -124,6 +124,8 @@ row_undo_node_create(
undo->state = UNDO_NODE_FETCH_NEXT;
undo->trx = trx;
btr_pcur_init(&(undo->pcur));
undo->heap = mem_heap_create(256);
return(undo);
@ -303,6 +305,16 @@ row_undo_step(
if (err != DB_SUCCESS) {
/* SQL error detected */
fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n", err);
if (err == DB_OUT_OF_FILE_SPACE) {
fprintf(stderr,
"InnoDB: Error 13 means out of tablespace.\n"
"InnoDB: Consider increasing your tablespace.\n");
exit(1);
}
ut_a(0);
return(NULL);

View File

@ -90,8 +90,10 @@ upd_node_create(
node->in_mysql_interface = FALSE;
node->row = NULL;
node->ext_vec = NULL;
node->index = NULL;
node->update = NULL;
node->select = NULL;
node->heap = mem_heap_create(128);
@ -160,7 +162,8 @@ row_upd_index_entry_sys_field(
}
/***************************************************************
Returns TRUE if row update changes size of some field in index. */
Returns TRUE if row update changes size of some field in index
or if some field to be updated is stored externally in rec or update. */
ibool
row_upd_changes_field_size(
@ -199,6 +202,16 @@ row_upd_changes_field_size(
return(TRUE);
}
if (rec_get_nth_field_extern_bit(rec, upd_field->field_no)) {
return(TRUE);
}
if (upd_field->extern_storage) {
return(TRUE);
}
}
return(FALSE);
@ -441,6 +454,34 @@ row_upd_index_parse(
return(ptr);
}
/*******************************************************************
Returns TRUE if ext_vec contains i. */
UNIV_INLINE
ibool
upd_ext_vec_contains(
/*=================*/
/* out: TRUE if i is in ext_vec */
ulint* ext_vec, /* in: array of indexes or NULL */
ulint n_ext_vec, /* in: number of numbers in ext_vec */
ulint i) /* in: a number */
{
ulint j;
if (ext_vec == NULL) {
return(FALSE);
}
for (j = 0; j < n_ext_vec; j++) {
if (ext_vec[j] == i) {
return(TRUE);
}
}
return(FALSE);
}
/*******************************************************************
Builds an update vector from those fields, excluding the roll ptr and
@ -454,6 +495,9 @@ row_upd_build_difference(
fields, excluding roll ptr and trx id */
dict_index_t* index, /* in: clustered index */
dtuple_t* entry, /* in: entry to insert */
ulint* ext_vec,/* in: array containing field numbers of
externally stored fields in entry, or NULL */
ulint n_ext_vec,/* in: number of fields in ext_vec */
rec_t* rec, /* in: clustered index record */
mem_heap_t* heap) /* in: memory heap from which allocated */
{
@ -480,16 +524,25 @@ row_upd_build_difference(
for (i = 0; i < dtuple_get_n_fields(entry); i++) {
data = rec_get_nth_field(rec, i, &len);
dfield = dtuple_get_nth_field(entry, i);
if ((i != trx_id_pos) && (i != roll_ptr_pos)
&& !dfield_data_is_equal(dfield, len, data)) {
if ((rec_get_nth_field_extern_bit(rec, i)
!= upd_ext_vec_contains(ext_vec, n_ext_vec, i))
|| ((i != trx_id_pos) && (i != roll_ptr_pos)
&& !dfield_data_is_equal(dfield, len, data))) {
upd_field = upd_get_nth_field(update, n_diff);
dfield_copy(&(upd_field->new_val), dfield);
upd_field_set_field_no(upd_field, i, index);
if (upd_ext_vec_contains(ext_vec, n_ext_vec, i)) {
upd_field->extern_storage = TRUE;
} else {
upd_field->extern_storage = FALSE;
}
n_diff++;
}
@ -630,9 +683,7 @@ row_upd_changes_ord_field(
}
/***************************************************************
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
fields in the index is small. Otherwise, this can be quadratic. */
Checks if an update vector changes an ordering field of an index record. */
ibool
row_upd_changes_some_index_ord_field(
@ -642,19 +693,24 @@ row_upd_changes_some_index_ord_field(
dict_table_t* table, /* in: table */
upd_t* update) /* in: update vector for the row */
{
upd_field_t* upd_field;
dict_index_t* index;
ulint i;
index = dict_table_get_first_index(table);
while (index) {
if (row_upd_changes_ord_field(NULL, index, update)) {
for (i = 0; i < upd_get_n_fields(update); i++) {
return(TRUE);
upd_field = upd_get_nth_field(update, i);
if (dict_field_get_col(dict_index_get_nth_field(index,
upd_field->field_no))
->ord_part) {
return(TRUE);
}
index = dict_table_get_next_index(index);
}
}
return(FALSE);
}
@ -710,15 +766,17 @@ row_upd_eval_new_vals(
/***************************************************************
Stores to the heap the row on which the node->pcur is positioned. */
UNIV_INLINE
static
void
row_upd_store_row(
/*==============*/
upd_node_t* node) /* in: row update node */
{
dict_index_t* clust_index;
upd_t* update;
rec_t* rec;
ut_ad((node->pcur)->latch_mode != BTR_NO_LATCHES);
ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES);
if (node->row != NULL) {
mem_heap_empty(node->heap);
@ -727,8 +785,20 @@ row_upd_store_row(
clust_index = dict_table_get_first_index(node->table);
node->row = row_build(ROW_COPY_DATA, clust_index,
btr_pcur_get_rec(node->pcur), node->heap);
rec = btr_pcur_get_rec(node->pcur);
node->row = row_build(ROW_COPY_DATA, clust_index, rec, node->heap);
node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint)
* rec_get_n_fields(rec));
if (node->is_delete) {
update = NULL;
} else {
update = node->update;
}
node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec,
rec, update);
}
/***************************************************************
@ -812,7 +882,7 @@ row_upd_sec_index_entry(
row_upd_index_replace_new_col_vals(entry, index, node->update);
/* Insert new index entry */
err = row_ins_index_entry(index, entry, thr);
err = row_ins_index_entry(index, entry, NULL, 0, thr);
mem_heap_free(heap);
@ -870,6 +940,8 @@ row_upd_clust_rec_by_insert(
dict_table_t* table;
mem_heap_t* heap;
dtuple_t* entry;
ulint* ext_vec;
ulint n_ext_vec;
ulint err;
ut_ad(node);
@ -897,14 +969,18 @@ row_upd_clust_rec_by_insert(
heap = mem_heap_create(1024);
ext_vec = mem_heap_alloc(heap,
sizeof(ulint) * dtuple_get_n_fields(node->row));
n_ext_vec = 0;
entry = row_build_index_entry(node->row, index, heap);
row_upd_clust_index_replace_new_col_vals(entry, node->update);
row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id);
err = row_ins_index_entry(index, entry, thr);
err = row_ins_index_entry(index, entry, node->ext_vec,
node->n_ext_vec, thr);
mem_heap_free(heap);
return(err);
@ -924,6 +1000,7 @@ row_upd_clust_rec(
que_thr_t* thr, /* in: query thread */
mtr_t* mtr) /* in: mtr; gets committed here */
{
big_rec_t* big_rec = NULL;
btr_pcur_t* pcur;
btr_cur_t* btr_cur;
ulint err;
@ -973,9 +1050,24 @@ row_upd_clust_rec(
ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur)));
err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur,
node->update, node->cmpl_info, thr, mtr);
&big_rec, node->update,
node->cmpl_info, thr, mtr);
mtr_commit(mtr);
if (err == DB_SUCCESS && big_rec) {
mtr_start(mtr);
ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr));
err = btr_store_big_rec_extern_fields(index,
btr_cur_get_rec(btr_cur),
big_rec, mtr);
mtr_commit(mtr);
}
if (big_rec) {
dtuple_big_rec_free(big_rec);
}
return(err);
}
@ -1194,10 +1286,12 @@ row_upd(
ut_ad(node && thr);
if (node->in_mysql_interface) {
/* We do not get the cmpl_info value from the MySQL
interpreter: we must calculate it on the fly: */
if (row_upd_changes_some_index_ord_field(node->table,
if (node->is_delete ||
row_upd_changes_some_index_ord_field(node->table,
node->update)) {
node->cmpl_info = 0;
} else {
@ -1239,6 +1333,7 @@ function_exit:
if (node->row != NULL) {
mem_heap_empty(node->heap);
node->row = NULL;
node->n_ext_vec = 0;
}
node->state = UPD_NODE_UPDATE_CLUSTERED;

View File

@ -93,6 +93,8 @@ ulint srv_lock_wait_timeout = 1024 * 1024 * 1024;
char* srv_unix_file_flush_method_str = NULL;
ulint srv_unix_file_flush_method = 0;
ibool srv_use_doublewrite_buf = TRUE;
ibool srv_set_thread_priorities = TRUE;
int srv_query_thread_priority = 0;
/*-------------------------------------------*/
@ -109,6 +111,8 @@ ibool srv_print_buf_io = FALSE;
ibool srv_print_log_io = FALSE;
ibool srv_print_latch_waits = FALSE;
ibool srv_print_innodb_monitor = FALSE;
/* The parameters below are obsolete: */
ibool srv_print_parsed_sql = FALSE;
@ -1492,7 +1496,6 @@ srv_init(void)
slot = srv_mysql_table + i;
slot->in_use = FALSE;
slot->event = os_event_create(NULL);
slot->suspended = FALSE;
ut_a(slot->event);
}
@ -1661,7 +1664,6 @@ srv_suspend_mysql_thread(
slot->thr = thr;
os_event_reset(event);
slot->suspended = TRUE;
slot->suspend_time = ut_time();
@ -1693,27 +1695,6 @@ srv_suspend_mysql_thread(
return(FALSE);
}
os_event_t
srv_mysql_thread_event_get(void)
{
srv_slot_t* slot;
os_event_t event;
mutex_enter(&kernel_mutex);
slot = srv_table_reserve_slot_for_mysql();
event = slot->event;
os_event_reset(event);
slot->suspended = TRUE;
mutex_exit(&kernel_mutex);
return(event);
}
/************************************************************************
Releases a MySQL OS thread waiting for a lock to be released, if the
thread is already suspended. */
@ -1737,7 +1718,6 @@ srv_release_mysql_thread_if_suspended(
/* Found */
os_event_set(slot->event);
slot->suspended = FALSE;
return;
}
@ -1746,59 +1726,6 @@ srv_release_mysql_thread_if_suspended(
/* not found */
}
void
srv_mysql_thread_release(void)
/*==========================*/
{
srv_slot_t* slot;
ulint i;
mutex_enter(&kernel_mutex);
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i;
if (slot->in_use && slot->suspended) {
/* Found */
slot->suspended = FALSE;
mutex_exit(&kernel_mutex);
os_event_set(slot->event);
return;
}
}
ut_a(0);
}
void
srv_mysql_thread_slot_free(
/*==========================*/
os_event_t event)
{
srv_slot_t* slot;
ulint i;
mutex_enter(&kernel_mutex);
for (i = 0; i < OS_THREAD_MAX_N; i++) {
slot = srv_mysql_table + i;
if (slot->in_use && slot->event == event) {
/* Found */
slot->in_use = FALSE;
mutex_exit(&kernel_mutex);
return;
}
}
ut_a(0);
}
/*************************************************************************
A thread which wakes up threads whose lock wait may have lasted too long. */
@ -1924,6 +1851,7 @@ srv_master_thread(
ulint i;
time_t last_flush_time;
time_t current_time;
time_t last_monitor_time;
UT_NOT_USED(arg);
@ -1936,6 +1864,8 @@ srv_master_thread(
mutex_exit(&kernel_mutex);
os_event_set(srv_sys->operational);
last_monitor_time = time(NULL);
loop:
mutex_enter(&kernel_mutex);
@ -1975,8 +1905,18 @@ loop:
while (n_pages_purged) {
/* TODO: replace this by a check if we are running
out of file space! */
if (srv_print_innodb_monitor) {
ut_print_timestamp(stdout);
printf(" InnoDB starts purge\n");
}
n_pages_purged = trx_purge();
if (srv_print_innodb_monitor) {
ut_print_timestamp(stdout);
printf(" InnoDB purged %lu pages\n", n_pages_purged);
}
current_time = time(NULL);
if (difftime(current_time, last_flush_time) > 1) {
@ -1986,14 +1926,40 @@ loop:
}
background_loop:
/*
sync_array_print_info(sync_primary_wait_array);
os_aio_print();
buf_print_io();
*/
/* In this loop we run background operations while the server
is quiet */
current_time = time(NULL);
if (srv_print_innodb_monitor
&& difftime(current_time, last_monitor_time) > 8) {
printf("================================\n");
last_monitor_time = time(NULL);
ut_print_timestamp(stdout);
printf(" INNODB MONITOR OUTPUT\n"
"================================\n");
printf("--------------------------\n"
"LOCKS HELD BY TRANSACTIONS\n"
"--------------------------\n");
lock_print_info();
printf("-----------------------------------------------\n"
"CURRENT SEMAPHORES RESERVED AND SEMAPHORE WAITS\n"
"-----------------------------------------------\n");
sync_print();
printf("CURRENT PENDING FILE I/O'S\n"
"--------------------------\n");
os_aio_print();
printf("-----------\n"
"BUFFER POOL\n"
"-----------\n");
buf_print_io();
printf("----------------------------\n"
"END OF INNODB MONITOR OUTPUT\n"
"============================\n");
}
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
@ -2005,8 +1971,18 @@ background_loop:
/* The server has been quiet for a while: start running background
operations */
if (srv_print_innodb_monitor) {
ut_print_timestamp(stdout);
printf(" InnoDB starts purge\n");
}
n_pages_purged = trx_purge();
if (srv_print_innodb_monitor) {
ut_print_timestamp(stdout);
printf(" InnoDB purged %lu pages\n", n_pages_purged);
}
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
@ -2014,8 +1990,18 @@ background_loop:
}
mutex_exit(&kernel_mutex);
if (srv_print_innodb_monitor) {
ut_print_timestamp(stdout);
printf(" InnoDB starts insert buffer merge\n");
}
n_bytes_merged = ibuf_contract(TRUE);
if (srv_print_innodb_monitor) {
ut_print_timestamp(stdout);
printf(" InnoDB merged %lu bytes\n", n_bytes_merged);
}
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
mutex_exit(&kernel_mutex);
@ -2023,7 +2009,7 @@ background_loop:
}
mutex_exit(&kernel_mutex);
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 20, ut_dulint_max);
n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
mutex_enter(&kernel_mutex);
if (srv_activity_count != old_activity_count) {
@ -2052,14 +2038,12 @@ background_loop:
/* mem_print_new_info();
*/
/* fsp_print(0); */
/* fprintf(stderr, "Validating tablespace\n");
/*
fsp_print(0);
fprintf(stderr, "Validating tablespace\n");
fsp_validate(0);
fprintf(stderr, "Validation ok\n");
*/
#ifdef UNIV_SEARCH_PERF_STAT
/* btr_search_print_info(); */
#endif

View File

@ -1,7 +1,7 @@
/************************************************************************
Starts the InnoDB database server
(c) 1996-2000 InnoDB Oy
(c) 1996-2000 Innobase Oy
Created 2/16/1996 Heikki Tuuri
*************************************************************************/
@ -203,8 +203,8 @@ open_or_create_log_file(
sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], "ib_logfile", i);
files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL, &ret);
files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL,
OS_LOG_FILE, &ret);
if (ret == FALSE) {
if (os_file_get_last_error() != OS_FILE_ALREADY_EXISTS) {
fprintf(stderr,
@ -214,7 +214,8 @@ open_or_create_log_file(
}
files[i] = os_file_create(
name, OS_FILE_OPEN, OS_FILE_AIO, &ret);
name, OS_FILE_OPEN, OS_FILE_AIO,
OS_LOG_FILE, &ret);
if (!ret) {
fprintf(stderr,
"InnoDB: Error in opening %s\n", name);
@ -239,7 +240,7 @@ open_or_create_log_file(
fprintf(stderr,
"InnoDB: Log file %s did not exist: new to be created\n",
name);
printf("InnoDB: Setting log file %s size to %lu\n",
fprintf(stderr, "InnoDB: Setting log file %s size to %lu\n",
name, UNIV_PAGE_SIZE * srv_log_file_size);
ret = os_file_set_size(name, files[i],
@ -330,27 +331,28 @@ open_or_create_data_files(
sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]);
if (srv_data_file_is_raw_partition[i] == 0) {
files[i] = os_file_create(name, OS_FILE_CREATE,
OS_FILE_NORMAL, OS_DATA_FILE, &ret);
files[i] = os_file_create(name, OS_FILE_CREATE,
OS_FILE_NORMAL, &ret);
} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
ret = FALSE;
} else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) {
/* The partition is opened, not created; then it is
written over */
files[i] = os_file_create(
name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
if (!ret) {
files[i] = os_file_create(
name, OS_FILE_OPEN, OS_FILE_NORMAL,
OS_DATA_FILE, &ret);
if (!ret) {
fprintf(stderr,
"InnoDB: Error in opening %s\n", name);
return(DB_ERROR);
}
}
} else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) {
ret = FALSE;
}
if (ret == FALSE) {
if (srv_data_file_is_raw_partition[i] == 0
if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW
&& os_file_get_last_error() !=
OS_FILE_ALREADY_EXISTS) {
fprintf(stderr,
@ -370,8 +372,8 @@ open_or_create_data_files(
}
files[i] = os_file_create(
name, OS_FILE_OPEN, OS_FILE_NORMAL, &ret);
name, OS_FILE_OPEN, OS_FILE_NORMAL,
OS_DATA_FILE, &ret);
if (!ret) {
fprintf(stderr,
"InnoDB: Error in opening %s\n", name);
@ -379,18 +381,21 @@ open_or_create_data_files(
return(DB_ERROR);
}
ret = os_file_get_size(files[i], &size, &size_high);
ut_a(ret);
if (srv_data_file_is_raw_partition[i] != SRV_OLD_RAW) {
ret = os_file_get_size(files[i], &size,
&size_high);
ut_a(ret);
if (srv_data_file_is_raw_partition[i] == 0
&& (size != UNIV_PAGE_SIZE * srv_data_file_sizes[i]
|| size_high != 0)) {
fprintf(stderr,
if (size !=
UNIV_PAGE_SIZE * srv_data_file_sizes[i]
|| size_high != 0) {
fprintf(stderr,
"InnoDB: Error: data file %s is of different size\n"
"InnoDB: than specified in the .cnf file!\n", name);
return(DB_ERROR);
return(DB_ERROR);
}
}
fil_read_flushed_lsn_and_arch_log_no(files[i],
@ -403,7 +408,8 @@ open_or_create_data_files(
if (i > 0) {
fprintf(stderr,
"InnoDB: Data file %s did not exist: new to be created\n", name);
"InnoDB: Data file %s did not exist: new to be created\n",
name);
} else {
fprintf(stderr,
"InnoDB: The first specified data file %s did not exist:\n"
@ -411,10 +417,10 @@ open_or_create_data_files(
*create_new_db = TRUE;
}
printf("InnoDB: Setting file %s size to %lu\n",
fprintf(stderr, "InnoDB: Setting file %s size to %lu\n",
name, UNIV_PAGE_SIZE * srv_data_file_sizes[i]);
printf(
fprintf(stderr,
"InnoDB: Database physically writes the file full: wait...\n");
ret = os_file_set_size(name, files[i],
@ -555,19 +561,22 @@ innobase_start_or_create_for_mysql(void)
srv_startup_is_before_trx_rollback_phase = TRUE;
if (0 == ut_strcmp(srv_unix_file_flush_method_str, "fdatasync")) {
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "O_DSYNC")) {
srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
srv_unix_file_flush_method = SRV_UNIX_O_DSYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str,
"littlesync")) {
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "nosync")) {
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
} else {
fprintf(stderr,
"InnoDB: Unrecognized value for innodb_unix_file_flush_method\n");
srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC;
return(DB_ERROR);
} else if (0 == ut_strcmp(srv_unix_file_flush_method_str, "nosync")) {
srv_unix_file_flush_method = SRV_UNIX_NOSYNC;
} else {
fprintf(stderr,
"InnoDB: Unrecognized value %s for innodb_flush_method\n",
srv_unix_file_flush_method_str);
return(DB_ERROR);
}
/*
@ -593,14 +602,15 @@ innobase_start_or_create_for_mysql(void)
#ifdef __WIN__
if (os_get_os_version() == OS_WIN95
|| os_get_os_version() == OS_WIN31) {
/* On Win 95, 98, ME, and Win32 subsystem for Windows 3.1 use
simulated aio */
os_aio_use_native_aio = FALSE;
srv_n_file_io_threads = 4;
/* On Win 95, 98, ME, and Win32 subsystem for Windows 3.1 use
simulated aio */
os_aio_use_native_aio = FALSE;
srv_n_file_io_threads = 4;
} else {
/* On NT and Win 2000 always use aio */
os_aio_use_native_aio = TRUE;
/* On NT and Win 2000 always use aio */
os_aio_use_native_aio = TRUE;
}
#endif
if (!os_aio_use_native_aio) {
@ -652,14 +662,21 @@ innobase_start_or_create_for_mysql(void)
sum_of_new_sizes = 0;
for (i = 0; i < srv_n_data_files; i++) {
sum_of_new_sizes += srv_data_file_sizes[i];
if (srv_data_file_sizes[i] >= 262144) {
fprintf(stderr,
"InnoDB: Error: file size must be < 4 GB, or on some OS's < 2 GB\n");
return(DB_ERROR);
}
sum_of_new_sizes += srv_data_file_sizes[i];
}
if (sum_of_new_sizes < 640) {
fprintf(stderr,
fprintf(stderr,
"InnoDB: Error: tablespace size must be at least 10 MB\n");
return(DB_ERROR);
return(DB_ERROR);
}
err = open_or_create_data_files(&create_new_db,
@ -673,6 +690,15 @@ innobase_start_or_create_for_mysql(void)
return((int) err);
}
if (!create_new_db) {
/* If we are using the doublewrite method, we will
check if there are half-written pages in data files,
and restore them from the doublewrite buffer if
possible */
trx_sys_doublewrite_restore_corrupt_pages();
}
srv_normalize_path_for_win(srv_arch_dir);
srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir);
@ -742,7 +768,6 @@ innobase_start_or_create_for_mysql(void)
mutex_exit(&(log_sys->mutex));
}
/* mutex_create(&row_mysql_thread_mutex); */
sess_sys_init_at_db_start();
if (create_new_db) {
@ -834,7 +859,7 @@ innobase_start_or_create_for_mysql(void)
}
if (srv_measure_contention) {
/* os_thread_create(&test_measure_cont, NULL, thread_ids +
/* os_thread_create(&test_measure_cont, NULL, thread_ids +
SRV_MAX_N_IO_THREADS); */
}
@ -849,16 +874,20 @@ innobase_start_or_create_for_mysql(void)
/* Create the thread which watches the timeouts for lock waits */
os_thread_create(&srv_lock_timeout_monitor_thread, NULL,
thread_ids + 2 + SRV_MAX_N_IO_THREADS);
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Started\n");
srv_was_started = TRUE;
srv_is_being_started = FALSE;
sync_order_checks_on = TRUE;
if (srv_use_doublewrite_buf && trx_doublewrite == NULL) {
trx_sys_create_doublewrite_buf();
}
/* buf_debug_prints = TRUE; */
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: Started\n");
return((int) DB_SUCCESS);
}

View File

@ -810,11 +810,10 @@ rw_lock_print(
ulint count = 0;
rw_lock_debug_t* info;
printf("----------------------------------------------\n");
printf("-------------------------------------------------\n");
printf("RW-LOCK INFO\n");
printf("RW-LOCK: %lx ", (ulint)lock);
mutex_enter(&(lock->mutex));
if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
|| (rw_lock_get_reader_count(lock) != 0)
|| (rw_lock_get_waiters(lock) != 0)) {
@ -831,8 +830,6 @@ rw_lock_print(
info = UT_LIST_GET_NEXT(list, info);
}
}
mutex_exit(&(lock->mutex));
#endif
}

View File

@ -158,7 +158,7 @@ struct sync_thread_struct{
};
/* Number of slots reserved for each OS thread in the sync level array */
#define SYNC_THREAD_N_LEVELS 256
#define SYNC_THREAD_N_LEVELS 10000
struct sync_level_struct{
void* latch; /* pointer to a mutex or an rw-lock; NULL means that
@ -768,6 +768,9 @@ sync_thread_levels_g(
thread */
ulint limit) /* in: level limit */
{
char* file_name;
ulint line;
ulint thread_id;
sync_level_t* slot;
rw_lock_t* lock;
mutex_t* mutex;
@ -783,8 +786,29 @@ sync_thread_levels_g(
lock = slot->latch;
mutex = slot->latch;
ut_error;
printf(
"InnoDB error: sync levels should be > %lu but a level is %lu\n",
limit, slot->level);
if (mutex->magic_n == MUTEX_MAGIC_N) {
printf("Mutex created at %s %lu\n", &(mutex->cfile_name),
mutex->cline);
if (mutex_get_lock_word(mutex) != 0) {
mutex_get_debug_info(mutex,
&file_name, &line, &thread_id);
printf("InnoDB: Locked mutex: addr %lx thread %ld file %s line %ld\n",
(ulint)mutex, thread_id,
file_name, line);
} else {
printf("Not locked\n");
}
} else {
rw_lock_print(lock);
}
return(FALSE);
}
}
@ -973,6 +997,8 @@ sync_thread_add_level(
ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH));
} else if (level == SYNC_TRX_SYS_HEADER) {
ut_a(sync_thread_levels_contain(array, SYNC_KERNEL));
} else if (level == SYNC_DOUBLEWRITE) {
ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE));
} else if (level == SYNC_BUF_BLOCK) {
ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL)
&& sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1))
@ -1000,6 +1026,8 @@ sync_thread_add_level(
} else if (level == SYNC_FSP) {
ut_a(sync_thread_levels_contain(array, SYNC_FSP)
|| sync_thread_levels_g(array, SYNC_FSP));
} else if (level == SYNC_EXTERN_STORAGE) {
ut_a(TRUE);
} else if (level == SYNC_TRX_UNDO_PAGE) {
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
|| sync_thread_levels_contain(array, SYNC_RSEG)
@ -1221,10 +1249,10 @@ void
sync_print(void)
/*============*/
{
printf("SYNC INFO:------------------------------------------\n");
printf("SYNC INFO:\n");
mutex_list_print_info();
rw_lock_list_print_info();
sync_array_print_info(sync_primary_wait_array);
sync_print_wait_info();
printf("----------------------------------------------------\n");
printf("-----------------------------------------------------\n");
}

View File

@ -678,6 +678,8 @@ trx_purge_choose_next_log(void)
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
min_trx_no = ut_dulint_max;
min_rseg = NULL;
while (rseg) {
@ -692,6 +694,9 @@ trx_purge_choose_next_log(void)
min_rseg = rseg;
min_trx_no = rseg->last_trx_no;
space = rseg->space;
ut_a(space == 0); /* We assume in purge of
externally stored fields
that space id == 0 */
page_no = rseg->last_page_no;
offset = rseg->last_offset;
}
@ -820,6 +825,10 @@ trx_purge_get_next_rec(
}
cmpl_info = trx_undo_rec_get_cmpl_info(rec2);
if (trx_undo_rec_get_extern_storage(rec2)) {
break;
}
if ((type == TRX_UNDO_UPD_EXIST_REC)
&& !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) {

View File

@ -292,6 +292,8 @@ trx_undo_rec_get_pars(
TRX_UNDO_INSERT_REC, ... */
ulint* cmpl_info, /* out: compiler info, relevant only
for update type records */
ibool* updated_extern, /* out: TRUE if we updated an
externally stored fild */
dulint* undo_no, /* out: undo log record number */
dulint* table_id) /* out: table id */
{
@ -303,7 +305,14 @@ trx_undo_rec_get_pars(
type_cmpl = mach_read_from_1(ptr);
ptr++;
if (type_cmpl & TRX_UNDO_UPD_EXTERN) {
*updated_extern = TRUE;
type_cmpl -= TRX_UNDO_UPD_EXTERN;
} else {
*updated_extern = FALSE;
}
*type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1);
*cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT;
@ -336,7 +345,11 @@ trx_undo_rec_get_col_val(
*field = ptr;
if (*len != UNIV_SQL_NULL) {
ptr += *len;
if (*len >= UNIV_EXTERN_STORAGE_FIELD) {
ptr += (*len - UNIV_EXTERN_STORAGE_FIELD);
} else {
ptr += *len;
}
}
return(ptr);
@ -452,6 +465,7 @@ trx_undo_page_report_modify(
ulint col_no;
byte* old_ptr;
ulint type_cmpl;
byte* type_cmpl_ptr;
ulint i;
ut_ad(index->type & DICT_CLUSTERED);
@ -491,6 +505,8 @@ trx_undo_page_report_modify(
mach_write_to_1(ptr, type_cmpl);
type_cmpl_ptr = ptr;
ptr++;
len = mach_dulint_write_much_compressed(ptr, trx->undo_no);
ptr += len;
@ -577,7 +593,23 @@ trx_undo_page_report_modify(
return(0);
}
len = mach_write_compressed(ptr, flen);
if (rec_get_nth_field_extern_bit(rec, pos)) {
/* If a field has external storage, we add to
flen the flag */
len = mach_write_compressed(ptr,
UNIV_EXTERN_STORAGE_FIELD + flen);
/* Notify purge that it eventually has to free the old
externally stored field */
(trx->update_undo)->del_marks = TRUE;
*type_cmpl_ptr = *type_cmpl_ptr | TRX_UNDO_UPD_EXTERN;
} else {
len = mach_write_compressed(ptr, flen);
}
ptr += len;
if (flen != UNIV_SQL_NULL) {
@ -825,6 +857,13 @@ trx_undo_update_rec_get_update(
upd_field_set_field_no(upd_field, field_no, index);
if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) {
upd_field->extern_storage = TRUE;
len -= UNIV_EXTERN_STORAGE_FIELD;
}
dfield_set_data(&(upd_field->new_val), field, len);
}
@ -1222,8 +1261,10 @@ trx_undo_prev_version_build(
byte* ptr;
ulint info_bits;
ulint cmpl_info;
ibool dummy_extern;
byte* buf;
ulint err;
ulint i;
ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED));
ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec),
@ -1252,8 +1293,9 @@ trx_undo_prev_version_build(
return(err);
}
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, &undo_no,
&table_id);
ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info,
&dummy_extern, &undo_no, &table_id);
ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr,
&info_bits);
ptr = trx_undo_rec_skip_row_ref(ptr, index);
@ -1278,5 +1320,15 @@ trx_undo_prev_version_build(
row_upd_rec_in_place(*old_vers, update);
}
for (i = 0; i < upd_get_n_fields(update); i++) {
if (upd_get_nth_field(update, i)->extern_storage) {
rec_set_nth_field_extern_bit(*old_vers,
upd_get_nth_field(update, i)->field_no,
TRUE, NULL);
}
}
return(DB_SUCCESS);
}

View File

@ -19,9 +19,326 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0undo.h"
#include "srv0srv.h"
#include "trx0purge.h"
#include "log0log.h"
/* The transaction system */
trx_sys_t* trx_sys = NULL;
trx_sys_t* trx_sys = NULL;
trx_doublewrite_t* trx_doublewrite = NULL;
/********************************************************************
Creates or initialializes the doublewrite buffer at a database start. */
static
void
trx_doublewrite_init(
/*=================*/
byte* doublewrite) /* in: pointer to the doublewrite buf
header on trx sys page */
{
trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t));
mutex_create(&(trx_doublewrite->mutex));
mutex_set_level(&(trx_doublewrite->mutex), SYNC_DOUBLEWRITE);
trx_doublewrite->first_free = 0;
trx_doublewrite->block1 = mach_read_from_4(
doublewrite
+ TRX_SYS_DOUBLEWRITE_BLOCK1);
trx_doublewrite->block2 = mach_read_from_4(
doublewrite
+ TRX_SYS_DOUBLEWRITE_BLOCK2);
trx_doublewrite->write_buf_unaligned =
ut_malloc(
(1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
* UNIV_PAGE_SIZE);
trx_doublewrite->write_buf = ut_align(
trx_doublewrite->write_buf_unaligned,
UNIV_PAGE_SIZE);
trx_doublewrite->buf_block_arr = mem_alloc(
2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
* sizeof(void*));
}
/********************************************************************
Creates the doublewrite buffer at a database start. The header of the
doublewrite buffer is placed on the trx system header page. */
void
trx_sys_create_doublewrite_buf(void)
/*================================*/
{
page_t* page;
page_t* page2;
page_t* new_page;
byte* doublewrite;
byte* fseg_header;
ulint page_no;
ulint prev_page_no;
ulint i;
mtr_t mtr;
if (trx_doublewrite) {
/* Already inited */
return;
}
start_again:
mtr_start(&mtr);
page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr);
buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK);
doublewrite = page + TRX_SYS_DOUBLEWRITE;
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
/* The doublewrite buffer has already been created:
just read in some numbers */
trx_doublewrite_init(doublewrite);
mtr_commit(&mtr);
} else {
fprintf(stderr,
"InnoDB: Doublewrite buffer not found: creating new\n");
if (buf_pool_get_curr_size() <
(2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ FSP_EXTENT_SIZE / 2 + 100)
* UNIV_PAGE_SIZE) {
fprintf(stderr,
"InnoDB: Cannot create doublewrite buffer: you must\n"
"InnoDB: increase your buffer pool size.\n"
"InnoDB: Cannot continue operation.\n");
exit(1);
}
page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
TRX_SYS_DOUBLEWRITE + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
/* fseg_create acquires a second latch on the page,
therefore we must declare it: */
buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK);
if (page2 == NULL) {
fprintf(stderr,
"InnoDB: Cannot create doublewrite buffer: you must\n"
"InnoDB: increase your tablespace size.\n"
"InnoDB: Cannot continue operation.\n");
/* We exit without committing the mtr to prevent
its modifications to the database getting to disk */
exit(1);
}
fseg_header = page + TRX_SYS_DOUBLEWRITE
+ TRX_SYS_DOUBLEWRITE_FSEG;
prev_page_no = 0;
for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+ FSP_EXTENT_SIZE / 2; i++) {
page_no = fseg_alloc_free_page(fseg_header,
prev_page_no + 1,
FSP_UP, &mtr);
if (page_no == FIL_NULL) {
fprintf(stderr,
"InnoDB: Cannot create doublewrite buffer: you must\n"
"InnoDB: increase your tablespace size.\n"
"InnoDB: Cannot continue operation.\n");
exit(1);
}
/* We read the allocated pages to the buffer pool;
when they are written to disk in a flush, the space
id and page number fields are also written to the
pages. When we at database startup read pages
from the doublewrite buffer, we know that if the
space id and page number in them are the same as
the page position in the tablespace, then the page
has not been written to in doublewrite. */
new_page = buf_page_get(TRX_SYS_SPACE, page_no,
RW_X_LATCH, &mtr);
buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK);
/* Make a dummy change to the page to ensure it will
be written to disk in a flush */
mlog_write_ulint(new_page + FIL_PAGE_DATA,
TRX_SYS_DOUBLEWRITE_MAGIC_N,
MLOG_4BYTES, &mtr);
if (i == FSP_EXTENT_SIZE / 2) {
mlog_write_ulint(doublewrite
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
page_no, MLOG_4BYTES, &mtr);
mlog_write_ulint(doublewrite
+ TRX_SYS_DOUBLEWRITE_REPEAT
+ TRX_SYS_DOUBLEWRITE_BLOCK1,
page_no, MLOG_4BYTES, &mtr);
} else if (i == FSP_EXTENT_SIZE / 2
+ TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
mlog_write_ulint(doublewrite
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
page_no, MLOG_4BYTES, &mtr);
mlog_write_ulint(doublewrite
+ TRX_SYS_DOUBLEWRITE_REPEAT
+ TRX_SYS_DOUBLEWRITE_BLOCK2,
page_no, MLOG_4BYTES, &mtr);
} else if (i > FSP_EXTENT_SIZE / 2) {
ut_a(page_no == prev_page_no + 1);
}
prev_page_no = page_no;
}
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
+ TRX_SYS_DOUBLEWRITE_REPEAT,
TRX_SYS_DOUBLEWRITE_MAGIC_N, MLOG_4BYTES, &mtr);
mtr_commit(&mtr);
/* Flush the modified pages to disk and make a checkpoint */
log_make_checkpoint_at(ut_dulint_max, TRUE);
fprintf(stderr, "InnoDB: Doublewrite buffer created\n");
goto start_again;
}
}
/********************************************************************
At a database startup uses a possible doublewrite buffer to restore
half-written pages in the data files. */
void
trx_sys_doublewrite_restore_corrupt_pages(void)
/*===========================================*/
{
byte* buf;
byte* read_buf;
byte* unaligned_read_buf;
ulint block1;
ulint block2;
byte* page;
byte* doublewrite;
ulint space_id;
ulint page_no;
ulint i;
/* We do the file i/o past the buffer pool */
unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE);
read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE);
/* Read the trx sys header to check if we are using the
doublewrite buffer */
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0,
UNIV_PAGE_SIZE, read_buf, NULL);
doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
== TRX_SYS_DOUBLEWRITE_MAGIC_N) {
/* The doublewrite buffer has been created */
trx_doublewrite_init(doublewrite);
block1 = trx_doublewrite->block1;
block2 = trx_doublewrite->block2;
buf = trx_doublewrite->write_buf;
} else {
goto leave_func;
}
/* Read the pages from the doublewrite buffer to memory */
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf, NULL);
fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0,
TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
NULL);
/* Check if any of these pages is half-written in data files, in the
intended position */
page = buf;
for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
space_id = mach_read_from_4(page + FIL_PAGE_SPACE);
page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
if (!fil_check_adress_in_tablespace(space_id, page_no)) {
fprintf(stderr,
"InnoDB: Warning: an inconsistent page in the doublewrite buffer\n"
"InnoDB: space id %lu page number %lu, %lu'th page in dblwr buf.\n",
space_id, page_no, i);
} else if (space_id == TRX_SYS_SPACE
&& ( (page_no >= block1
&& page_no
< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
|| (page_no >= block2
&& page_no
< block2 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
/* It is an unwritten doublewrite buffer page:
do nothing */
} else {
/* Read in the actual page from the data files */
fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0,
UNIV_PAGE_SIZE, read_buf, NULL);
/* Check if the page is corrupt */
if (buf_page_is_corrupted(read_buf)) {
fprintf(stderr,
"InnoDB: Warning: database page corruption or a failed\n"
"InnoDB: file read of page %lu.\n", page_no);
fprintf(stderr,
"InnoDB: Trying to recover it from the doublewrite buffer.\n");
if (buf_page_is_corrupted(page)) {
fprintf(stderr,
"InnoDB: Also the page in the doublewrite buffer is corrupt.\n"
"InnoDB: Cannot continue operation.\n");
exit(1);
}
/* Write the good page from the
doublewrite buffer to the intended
position */
fil_io(OS_FILE_WRITE, TRUE, space_id,
page_no, 0,
UNIV_PAGE_SIZE, page, NULL);
fprintf(stderr,
"InnoDB: Recovered the page from the doublewrite buffer.\n");
}
}
page += UNIV_PAGE_SIZE;
}
fil_flush_file_spaces(FIL_TABLESPACE);
leave_func:
ut_free(unaligned_read_buf);
}
/********************************************************************
Checks that trx is in the trx list. */

View File

@ -55,7 +55,8 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \
mf_loadpath.lo my_pthread.lo my_thr_init.lo \
thr_mutex.lo mulalloc.lo string.lo default.lo \
my_compress.lo array.lo my_once.lo list.lo my_net.lo \
charset.lo hash.lo
charset.lo hash.lo mf_iocache.lo my_seek.lo \
my_pread.lo mf_cache.lo
# Not needed in the minimum library
mysysobjects2 = getopt.lo getopt1.lo getvar.lo my_lib.lo
mysysobjects = $(mysysobjects1) $(mysysobjects2)

View File

@ -293,7 +293,7 @@ HANDLE create_named_pipe(NET *net, uint connect_timeout, char **arg_host,
** or packet is an error message
*****************************************************************************/
static uint
uint
net_safe_read(MYSQL *mysql)
{
NET *net= &mysql->net;
@ -415,7 +415,7 @@ static void free_rows(MYSQL_DATA *cur)
}
static int
int
simple_command(MYSQL *mysql,enum enum_server_command command, const char *arg,
uint length, my_bool skipp_check)
{

View File

@ -1798,6 +1798,9 @@ compiler."
*-*-cygwin* | *-*-mingw* | *-*-os2* | *-*-beos*)
# these systems don't actually have a c library (as such)!
;;
*-*-freebsd*)
#FreeBSD needs to handle -lc and -lc_r itself
;;
*-*-rhapsody*)
# rhapsody is a little odd...
deplibs="$deplibs -framework System"

View File

@ -206,7 +206,7 @@ static struct option long_options[] =
static void print_version(void)
{
printf("%s Ver 1.48 for %s at %s\n",my_progname,SYSTEM_TYPE,
printf("%s Ver 1.49 for %s at %s\n",my_progname,SYSTEM_TYPE,
MACHINE_TYPE);
}
@ -468,7 +468,7 @@ static void get_options(register int *argc,register char ***argv)
if ((check_param.testflag & T_READONLY) &&
(check_param.testflag &
(T_REP_BY_SORT | T_REP | T_STATISTICS | T_AUTO_INC |
T_SORT_RECORDS | T_SORT_INDEX)))
T_SORT_RECORDS | T_SORT_INDEX | T_FORCE_CREATE)))
{
VOID(fprintf(stderr,
"%s: Can't use --readonly when repairing or sorting\n",

View File

@ -139,6 +139,8 @@ t1 1 level 1 level A 3 NULL NULL
gesuchnr benutzer_id
1 1
2 1
id x
1 2
Table Op Msg_type Msg_text
test.t1 optimize status OK
a

View File

@ -111,6 +111,31 @@ DateOfAction TransactionID
member_id nickname voornaam
1
2
gid sid uid
104620 5 15
103867 5 27
103962 5 27
104619 5 75
104505 5 117
103853 5 250
gid sid uid
104620 5 15
103867 5 27
103962 5 27
104619 5 75
104505 5 117
103853 5 250
table type possible_keys key key_len ref rows Extra
t1 index PRIMARY PRIMARY 4 NULL 6 Using index
t2 eq_ref PRIMARY,uid PRIMARY 4 t1.gid 1
t3 eq_ref PRIMARY PRIMARY 2 t2.uid 1 where used; Using index
table type possible_keys key key_len ref rows Extra
t1 index PRIMARY PRIMARY 4 NULL 6 Using index
t3 eq_ref PRIMARY PRIMARY 2 t1.gid 1 where used
table type possible_keys key key_len ref rows Extra
t1 index PRIMARY PRIMARY 4 NULL 6 Using index; Using temporary; Using filesort
t2 eq_ref PRIMARY,uid PRIMARY 4 t1.gid 1
t3 eq_ref PRIMARY PRIMARY 2 t2.uid 1 where used; Using index
table type possible_keys key key_len ref rows Extra
t1 range a a 20 NULL 2 where used; Using index
a b c

View File

@ -65,6 +65,13 @@ replace into t1 (gesuchnr,benutzer_id) values (1,1);
select * from t1;
drop table t1;
# test for bug in replace with secondary key
create table t1 (id int not null primary key, x int not null, key (x)) type=bdb;
insert into t1 (id, x) values (1, 1);
replace into t1 (id, x) values (1, 2);
select * from t1;
drop table t1;
#
# test delete using hidden_primary_key
#

View File

@ -2,7 +2,7 @@
# Test of fulltext index
#
drop table if exists t1,t2;
drop table if exists t1,t2,t3;
CREATE TABLE t1 (a VARCHAR(200), b TEXT, FULLTEXT (a,b));
INSERT INTO t1 VALUES('MySQL has now support', 'for full-text search'),('Full-text indexes', 'are called collections'),('Only MyISAM tables','support collections'),('Function MATCH ... AGAINST()','is used to do a search'),('Full-text search in MySQL', 'implements vector space model');
@ -61,4 +61,23 @@ select * from t2 where MATCH inhalt AGAINST (NULL);
select * from t2 where MATCH inhalt AGAINST ('foobar');
select * from t2 having MATCH inhalt AGAINST ('foobar');
drop table t1,t2;
#
# check of fulltext errors
#
CREATE TABLE t3 (
ticket int(11),
inhalt text,
KEY tig (ticket),
fulltext index tix (inhalt)
);
--error 1210
select * from t2 having MATCH inhalt AGAINST (t1.id);
--error 1210
select * from t2 having MATCH ticket AGAINST ('foobar');
--error 1210
select * from t2,t3 having MATCH (t2.inhalt,t3.inhalt) AGAINST ('foobar');
drop table t1,t2,t3;

View File

@ -254,3 +254,41 @@ select * from t1 where a between 0 and 1 order by a desc, b desc;
drop table t1;
CREATE TABLE t1 (
gid int(10) unsigned NOT NULL auto_increment,
cid smallint(5) unsigned NOT NULL default '0',
PRIMARY KEY (gid),
KEY component_id (cid)
) TYPE=MyISAM;
INSERT INTO t1 VALUES (103853,108),(103867,108),(103962,108),(104505,108),(104619,108),(104620,108);
ALTER TABLE t1 add skr int(10) not null;
CREATE TABLE t2 (
gid int(10) unsigned NOT NULL default '0',
uid smallint(5) unsigned NOT NULL default '1',
sid tinyint(3) unsigned NOT NULL default '1',
PRIMARY KEY (gid),
KEY uid (uid),
KEY status_id (sid)
) TYPE=MyISAM;
INSERT INTO t2 VALUES (103853,250,5),(103867,27,5),(103962,27,5),(104505,117,5),(104619,75,5),(104620,15,5);
CREATE TABLE t3 (
uid smallint(6) NOT NULL auto_increment,
PRIMARY KEY (uid)
) TYPE=MyISAM;
INSERT INTO t3 VALUES (1),(15),(27),(75),(117),(250);
ALTER TABLE t3 add skr int(10) not null;
select t1.gid, t2.sid, t3.uid from t2, t1, t3 where t2.gid = t1.gid and t2.uid = t3.uid order by t3.uid, t1.gid;
select t1.gid, t2.sid, t3.uid from t3, t2, t1 where t2.gid = t1.gid and t2.uid = t3.uid order by t3.uid, t1.gid;
# The following ORDER BY can be optimimized
EXPLAIN select t1.gid, t2.sid, t3.uid from t3, t2, t1 where t2.gid = t1.gid and t2.uid = t3.uid order by t1.gid, t3.uid;
EXPLAIN SELECT t1.gid, t3.uid from t1, t3 where t1.gid = t3.uid order by t1.gid,t3.skr;
# The following ORDER BY can't be optimimized
EXPLAIN SELECT t1.gid, t2.sid, t3.uid from t2, t1, t3 where t2.gid = t1.gid and t2.uid = t3.uid order by t3.uid, t1.gid;
EXPLAIN SELECT t1.gid, t3.uid from t1, t3 where t1.gid = t3.uid order by t3.skr,t1.gid;
EXPLAIN SELECT t1.gid, t3.uid from t1, t3 where t1.skr = t3.uid order by t1.gid,t3.skr;
drop table t1,t2,t3;

View File

@ -222,7 +222,7 @@ static my_bool search_default_file(DYNAMIC_ARRAY *args, MEM_ROOT *alloc,
const char *dir, const char *config_file,
const char *ext, TYPELIB *group)
{
char name[FN_REFLEN+10],buff[FN_REFLEN+1],*ptr,*end,*value,*tmp;
char name[FN_REFLEN+10],buff[4096],*ptr,*end,*value,*tmp;
FILE *fp;
uint line=0;
my_bool read_values=0,found_group=0;

View File

@ -223,18 +223,27 @@ foreach my $rdb ( @db_desc ) {
my $db = $rdb->{src};
eval { $dbh->do( "use $db" ); };
die "Database '$db' not accessible: $@" if ( $@ );
my @dbh_tables = $dbh->func( '_ListTables' );
my @dbh_tables = $dbh->tables();
## generate regex for tables/files
my $t_regex = $rdb->{t_regex}; ## assign temporary regex
my $negated = $t_regex =~ tr/~//d; ## remove and count negation operator: we don't allow ~ in table names
$t_regex = qr/$t_regex/; ## make regex string from user regex
my $t_regex;
my $negated;
if ($rdb->{t_regex}) {
$t_regex = $rdb->{t_regex}; ## assign temporary regex
$negated = $t_regex =~ tr/~//d; ## remove and count
## negation operator: we
## don't allow ~ in table
## names
## filter (out) tables specified in t_regex
print "Filtering tables with '$t_regex'\n" if $opt{debug};
@dbh_tables = ( $negated
? grep { $_ !~ $t_regex } @dbh_tables
: grep { $_ =~ $t_regex } @dbh_tables );
$t_regex = qr/$t_regex/; ## make regex string from
## user regex
## filter (out) tables specified in t_regex
print "Filtering tables with '$t_regex'\n" if $opt{debug};
@dbh_tables = ( $negated
? grep { $_ !~ $t_regex } @dbh_tables
: grep { $_ =~ $t_regex } @dbh_tables );
}
## get list of files to copy
my $db_dir = "$datadir/$db";
@ -249,10 +258,18 @@ foreach my $rdb ( @db_desc ) {
closedir( DBDIR );
## filter (out) files specified in t_regex
my @db_files = ( $negated
? grep { $db_files{$_} !~ $t_regex } keys %db_files
: grep { $db_files{$_} =~ $t_regex } keys %db_files );
my @db_files;
if ($rdb->{t_regex}) {
@db_files = ($negated
? grep { $db_files{$_} !~ $t_regex } keys %db_files
: grep { $db_files{$_} =~ $t_regex } keys %db_files );
}
else {
@db_files = keys %db_files;
}
@db_files = sort @db_files;
my @index_files=();
## remove indices unless we're told to keep them
@ -809,3 +826,7 @@ Ask Bjoern Hansen - Cleanup code to fix a few bugs and enable -w again.
Emil S. Hansen - Added resetslave and resetmaster.
Jeremy D. Zawodny - Removed depricated DBI calls. Fixed bug which
resulted in nothing being copied when a regexp was specified but no
database name(s).

View File

@ -27,7 +27,6 @@ INCLUDES = @MT_INCLUDES@ \
-I$(srcdir) -I../include -I.. -I. $(openssl_includes)
WRAPLIBS= @WRAPLIBS@
SUBDIRS = share
bin_PROGRAMS = mysqlbinlog
libexec_PROGRAMS = mysqld
noinst_PROGRAMS = gen_lex_hash
gen_lex_hash_LDFLAGS = @NOINST_LDFLAGS@
@ -83,12 +82,9 @@ mysqld_SOURCES = sql_lex.cc sql_handler.cc \
sql_udf.cc sql_analyse.cc sql_analyse.h sql_cache.cc \
slave.cc sql_repl.cc \
mini_client.cc mini_client_errors.c \
md5.c stacktrace.c sql_union.cc
md5.c stacktrace.c
gen_lex_hash_SOURCES = gen_lex_hash.cc
gen_lex_hash_LDADD = $(LDADD) $(CXXLDFLAGS)
mysqlbinlog_SOURCES = mysqlbinlog.cc mini_client.cc net_serv.cc \
mini_client_errors.c password.c
mysqlbinlog_LDADD = $(LDADD) $(CXXLDFLAGS) $(mysqld_LDADD)
DEFS = -DMYSQL_SERVER \
-DDEFAULT_MYSQL_HOME="\"$(MYSQLBASEdir)\"" \

View File

@ -888,7 +888,7 @@ int ha_berkeley::write_row(byte * record)
if (changed_keys & 1)
{
if ((new_error = remove_key(sub_trans, keynr, record,
(DBT*) 0, &prim_key)))
&prim_key)))
break; /* purecov: inspected */
}
}
@ -970,7 +970,7 @@ int ha_berkeley::update_primary_key(DB_TXN *trans, bool primary_key_changed,
{
// Primary key changed or we are updating a key that can have duplicates.
// Delete the old row and add a new one
if (!(error=remove_key(trans, primary_key, old_row, (DBT *) 0, old_key)))
if (!(error=remove_key(trans, primary_key, old_row, old_key)))
{
if (!(error=pack_row(&row, new_row, 0)))
{
@ -1034,7 +1034,7 @@ int ha_berkeley::restore_keys(DB_TXN *trans, key_map changed_keys,
if (changed_keys & 1)
{
if (changed_keys != 1 &&
(error = remove_key(trans, keynr, new_row, (DBT*) 0, new_key)))
(error = remove_key(trans, keynr, new_row, new_key)))
break; /* purecov: inspected */
if ((error = key_file[keynr]->put(key_file[keynr], trans,
create_key(&tmp_key, keynr, key_buff2,
@ -1105,8 +1105,7 @@ int ha_berkeley::update_row(const byte * old_row, byte * new_row)
continue;
if (key_cmp(keynr, old_row, new_row) || primary_key_changed)
{
if ((error=remove_key(sub_trans, keynr, old_row, (DBT*) 0,
&old_prim_key)))
if ((error=remove_key(sub_trans, keynr, old_row, &old_prim_key)))
{
if (using_ignore && /* purecov: inspected */
(thd_options & OPTION_INTERNAL_SUBTRANSACTIONS))
@ -1172,11 +1171,9 @@ int ha_berkeley::update_row(const byte * old_row, byte * new_row)
Delete one key
This uses key_buff2, when keynr != primary key, so it's important that
a function that calls this doesn't use this buffer for anything else.
packed_record may be NULL if the key is unique
*/
int ha_berkeley::remove_key(DB_TXN *trans, uint keynr, const byte *record,
DBT *packed_record,
DBT *prim_key)
{
int error;
@ -1207,13 +1204,9 @@ int ha_berkeley::remove_key(DB_TXN *trans, uint keynr, const byte *record,
if (!(error=key_file[keynr]->cursor(key_file[keynr], trans,
&tmp_cursor, 0)))
{
if (!(error=cursor->c_get(tmp_cursor,
(keynr == primary_key ?
prim_key :
create_key(&key, keynr, key_buff2, record)),
(keynr == primary_key ?
packed_record : prim_key),
DB_GET_BOTH | DB_RMW)))
if (!(error=tmp_cursor->c_get(tmp_cursor,
create_key(&key, keynr, key_buff2, record),
prim_key, DB_GET_BOTH | DB_RMW)))
{ // This shouldn't happen
error=tmp_cursor->c_del(tmp_cursor,0);
}
@ -1236,7 +1229,7 @@ int ha_berkeley::remove_keys(DB_TXN *trans, const byte *record,
{
if (keys & 1)
{
int new_error=remove_key(trans, keynr, record, new_record, prim_key);
int new_error=remove_key(trans, keynr, record, prim_key);
if (new_error)
{
result=new_error; // Return last error /* purecov: inspected */

View File

@ -69,8 +69,7 @@ class ha_berkeley: public handler
int key_length = MAX_KEY_LENGTH);
DBT *pack_key(DBT *key, uint keynr, char *buff, const byte *key_ptr,
uint key_length);
int remove_key(DB_TXN *trans, uint keynr, const byte *record,
DBT *packed_record, DBT *prim_key);
int remove_key(DB_TXN *trans, uint keynr, const byte *record, DBT *prim_key);
int remove_keys(DB_TXN *trans,const byte *record, DBT *new_record,
DBT *prim_key, key_map keys);
int restore_keys(DB_TXN *trans, key_map changed_keys, uint primary_key,

View File

@ -822,11 +822,11 @@ ha_innobase::open(
if (NULL == (ib_table = dict_table_get(norm_name, NULL))) {
fprintf(stderr, "\
Cannot find table %s from the internal data dictionary\n\
of InnoDB though the .frm file for the table exists. Maybe you have deleted\n\
and created again an InnoDB database but forgotten to delete the\n\
corresponding .frm files of old InnoDB tables?\n",
fprintf(stderr,
"Cannot find table %s from the internal data dictionary\n"
"of InnoDB though the .frm file for the table exists. Maybe you have deleted\n"
"and created again an InnoDB database but forgotten to delete the\n"
"corresponding .frm files of old InnoDB tables?\n",
norm_name);
free_share(share);
@ -2659,6 +2659,37 @@ ha_innobase::records_in_range(
DBUG_RETURN((ha_rows) n_rows);
}
/*************************************************************************
Gives an UPPER BOUND to the number of rows in a table. This is used in
filesort.cc and the upper bound must hold. TODO: Since the number of
rows in a table may change after this function is called, we still may
get a 'Sort aborted' error in filesort.cc of MySQL. The ultimate fix is to
improve the algorithm of filesort.cc. */
ha_rows
ha_innobase::estimate_number_of_rows(void)
/*======================================*/
/* out: upper bound of rows, currently 32-bit int
or uint */
{
row_prebuilt_t* prebuilt = (row_prebuilt_t*) innobase_prebuilt;
dict_table_t* ib_table;
DBUG_ENTER("info");
ib_table = prebuilt->table;
dict_update_statistics(ib_table);
data_file_length = ((ulonglong)
ib_table->stat_clustered_index_size)
* UNIV_PAGE_SIZE;
/* The minimum clustered index record size is 20 bytes */
return((ha_rows) (1000 + data_file_length / 20));
}
/*************************************************************************
How many seeks it will take to read through the table. This is to be
comparable to the number returned by records_in_range so that we can

View File

@ -137,6 +137,7 @@ class ha_innobase: public handler
enum ha_rkey_function start_search_flag,
const byte *end_key,uint end_key_len,
enum ha_rkey_function end_search_flag);
ha_rows estimate_number_of_rows();
int create(const char *name, register TABLE *form,
HA_CREATE_INFO *create_info);

View File

@ -35,7 +35,7 @@ ulong myisam_recover_options= HA_RECOVER_NONE;
/* bits in myisam_recover_options */
const char *myisam_recover_names[] =
{ "DEFAULT", "BACKUP", "FORCE", "QUICK"};
{ "DEFAULT", "BACKUP", "FORCE", "QUICK", NullS};
TYPELIB myisam_recover_typelib= {array_elements(myisam_recover_names),"",
myisam_recover_names};

View File

@ -1954,13 +1954,17 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist)
maybe_null=1;
join_key=0;
/* Why testing for const_item ? Monty */
/* I'll remove it later, but this should include modifications to
find_best and auto_close as complement to auto_init code above. SerG */
/* I'd rather say now that const_item is assumed in quite a bit of
places, so it would be difficult to remove. SerG */
/* Serg:
I'd rather say now that const_item is assumed in quite a bit of
places, so it would be difficult to remove; If it would ever to be
removed, this should include modifications to find_best and auto_close
as complement to auto_init code above.
*/
if (Item_func::fix_fields(thd,tlist) || !const_item())
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"AGAINST");
return 1;
}
while ((item=li++))
{
@ -1969,12 +1973,18 @@ bool Item_func_match::fix_fields(THD *thd,struct st_table_list *tlist)
if (item->type() == Item::REF_ITEM)
li.replace(item= *((Item_ref *)item)->ref);
if (item->type() != Item::FIELD_ITEM || !item->used_tables())
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
return 1;
}
used_tables_cache|=item->used_tables();
}
/* check that all columns come from the same table */
if (count_bits(used_tables_cache) != 1)
{
my_error(ER_WRONG_ARGUMENTS,MYF(0),"MATCH");
return 1;
}
const_item_cache=0;
table=((Item_field *)fields.head())->field->table;
return 0;

View File

@ -404,15 +404,26 @@ int Log_event::read_log_event(IO_CACHE* file, String* packet,
#endif // MYSQL_CLIENT
// allocates memory - the caller is responsible for clean-up
#ifndef MYSQL_CLIENT
#define UNLOCK_MUTEX if(log_lock) pthread_mutex_unlock(log_lock);
#else
#define UNLOCK_MUTEX
#endif
// allocates memory - the caller is responsible for clean-up
#ifndef MYSQL_CLIENT
Log_event* Log_event::read_log_event(IO_CACHE* file, pthread_mutex_t* log_lock)
#else
Log_event* Log_event::read_log_event(IO_CACHE* file)
#endif
{
char head[LOG_EVENT_HEADER_LEN];
if(log_lock) pthread_mutex_lock(log_lock);
#ifndef MYSQL_CLIENT
if(log_lock) pthread_mutex_lock(log_lock);
#endif
if (my_b_read(file, (byte *) head, sizeof(head)))
{
if (log_lock) pthread_mutex_unlock(log_lock);
UNLOCK_MUTEX;
return 0;
}
@ -449,7 +460,7 @@ Log_event* Log_event::read_log_event(IO_CACHE* file, pthread_mutex_t* log_lock)
if((res = read_log_event(buf, data_len)))
res->register_temp_buf(buf);
err:
if (log_lock) pthread_mutex_unlock(log_lock);
UNLOCK_MUTEX;
if(error)
{
sql_print_error(error);

View File

@ -233,10 +233,13 @@ public:
virtual void print(FILE* file, bool short_form = 0, char* last_db = 0) = 0;
void print_timestamp(FILE* file, time_t *ts = 0);
void print_header(FILE* file);
#endif
#ifndef MYSQL_CLIENT
// if mutex is 0, the read will proceed without mutex
static Log_event* read_log_event(IO_CACHE* file, pthread_mutex_t* log_lock);
#else // avoid having to link mysqlbinlog against libpthread
static Log_event* read_log_event(IO_CACHE* file);
#endif
static Log_event* read_log_event(const char* buf, int event_len);
const char* get_type_str();

View File

@ -177,6 +177,13 @@ char* query_table_status(THD *thd,const char *db,const char *table_name);
#define SELECT_NO_UNLOCK (QUERY_NO_GOOD_INDEX_USED*2)
#define TMP_TABLE_ALL_COLUMNS (SELECT_NO_UNLOCK*2)
#define MODE_REAL_AS_FLOAT 1
#define MODE_PIPES_AS_CONCAT 2
#define MODE_ANSI_QUOTES 4
#define MODE_IGNORE_SPACE 8
#define MODE_SERIALIZABLE 16
#define MODE_ONLY_FULL_GROUP_BY 32
#define RAID_BLOCK_SIZE 1024
/* BINLOG_DUMP options */

View File

@ -220,7 +220,7 @@ static char mysql_home[FN_REFLEN],pidfile_name[FN_REFLEN];
static pthread_t select_thread;
static bool opt_log,opt_update_log,opt_bin_log,opt_slow_log,opt_noacl,
opt_disable_networking=0, opt_bootstrap=0,opt_skip_show_db=0,
opt_ansi_mode=0,opt_myisam_log=0,
opt_myisam_log=0,
opt_large_files=sizeof(my_off_t) > 4;
bool opt_sql_bin_update = 0, opt_log_slave_updates = 0, opt_safe_show_db=0,
opt_show_slave_auth_info = 0, opt_old_rpl_compat = 0;
@ -320,6 +320,7 @@ char server_version[SERVER_VERSION_LENGTH]=MYSQL_SERVER_VERSION;
const char *first_keyword="first";
const char **errmesg; /* Error messages */
const char *myisam_recover_options_str="OFF";
const char *sql_mode_str="OFF";
const char *default_tx_isolation_name;
enum_tx_isolation default_tx_isolation=ISO_READ_COMMITTED;
@ -333,6 +334,12 @@ double log_10[32]; /* 10 potences */
I_List<THD> threads,thread_cache;
time_t start_time;
ulong opt_sql_mode = 0L;
const char *sql_mode_names[] =
{ "REAL_AS_FLOAT", "PIPES_AS_CONCAT", "ANSI_QUOTES", "IGNORE_SPACE",
"SERIALIZE","ONLY_FULL_GROUP_BY", NullS };
TYPELIB sql_mode_typelib= {array_elements(sql_mode_names),"",
sql_mode_names};
MY_BITMAP temp_pool;
bool use_temp_pool=0;
@ -1525,7 +1532,7 @@ static void open_log(MYSQL_LOG *log, const char *hostname,
// get rid of extention if the log is binary to avoid problems
if (type == LOG_BIN)
{
char* p = strrchr(opt_name, FN_EXTCHAR);
char* p = strrchr((char*) opt_name, FN_EXTCHAR);
if (p)
*p = 0;
}
@ -2520,6 +2527,7 @@ enum options {
OPT_REPORT_USER, OPT_REPORT_PASSWORD, OPT_REPORT_PORT,
OPT_MAX_BINLOG_DUMP_EVENTS, OPT_SPORADIC_BINLOG_DUMP_FAIL,
OPT_SHOW_SLAVE_AUTH_INFO, OPT_OLD_RPL_COMPAT,
OPT_SQL_MODE,
OPT_SLAVE_LOAD_TMPDIR};
static struct option long_options[] = {
@ -2664,6 +2672,7 @@ static struct option long_options[] = {
{"skip-thread-priority", no_argument, 0, (int) OPT_SKIP_PRIOR},
{"slave-load-tmpdir", required_argument, 0, (int) OPT_SLAVE_LOAD_TMPDIR},
{"sql-bin-update-same", no_argument, 0, (int) OPT_SQL_BIN_UPDATE_SAME},
{"sql-mode", required_argument, 0, (int) OPT_SQL_MODE},
#include "sslopt-longopts.h"
#ifdef __WIN__
{"standalone", no_argument, 0, (int) OPT_STANDALONE},
@ -2832,7 +2841,6 @@ CHANGEABLE_VAR changeable_vars[] = {
struct show_var_st init_vars[]= {
{"ansi_mode", (char*) &opt_ansi_mode, SHOW_BOOL},
{"back_log", (char*) &back_log, SHOW_LONG},
{"basedir", mysql_home, SHOW_CHAR},
#ifdef HAVE_BERKELEY_DB
@ -2939,6 +2947,7 @@ struct show_var_st init_vars[]= {
{"slow_launch_time", (char*) &slow_launch_time, SHOW_LONG},
{"socket", (char*) &mysql_unix_port, SHOW_CHAR_PTR},
{"sort_buffer", (char*) &sortbuff_size, SHOW_LONG},
{"sql_mode", (char*) &sql_mode_str, SHOW_CHAR_PTR},
{"table_cache", (char*) &table_cache_size, SHOW_LONG},
{"table_type", (char*) &default_table_type_name, SHOW_CHAR_PTR},
{"thread_cache_size", (char*) &thread_cache_size, SHOW_LONG},
@ -3122,6 +3131,9 @@ static void usage(void)
Don't give threads different priorities.\n\
--socket=... Socket file to use for connection\n\
-t, --tmpdir=path Path for temporary files\n\
--sql-mode=option[,option[,option...]] where option can be one of:\n\
REAL_AS_FLOAT, PIPES_AS_CONCAT, ANSI_QUOTES,\n\
IGNORE_SPACE, SERIALIZE, ONLY_FULL_GROUP_BY.\n\
--transaction-isolation\n\
Default transaction isolation level\n\
--temp-pool Use a pool of temporary files\n\
@ -3277,8 +3289,9 @@ static void get_options(int argc,char **argv)
opt_warnings=1;
break;
case 'a':
opt_ansi_mode=1;
thd_startup_options|=OPTION_ANSI_MODE;
opt_sql_mode = (MODE_REAL_AS_FLOAT | MODE_PIPES_AS_CONCAT |
MODE_ANSI_QUOTES | MODE_IGNORE_SPACE | MODE_SERIALIZABLE
| MODE_ONLY_FULL_GROUP_BY);
default_tx_isolation= ISO_SERIALIZABLE;
break;
case 'b':

View File

@ -125,6 +125,7 @@ THD::THD():user_time(0),fatal_error(0),last_insert_id_used(0),
server_status=SERVER_STATUS_AUTOCOMMIT;
update_lock_default= low_priority_updates ? TL_WRITE_LOW_PRIORITY : TL_WRITE;
options=thd_startup_options;
sql_mode=(uint) opt_sql_mode;
inactive_timeout=net_wait_timeout;
open_options=ha_open_options;
tx_isolation=session_tx_isolation=default_tx_isolation;

View File

@ -240,7 +240,7 @@ public:
char *query,*thread_stack;
char *host,*user,*priv_user,*db,*ip;
const char *proc_info;
uint client_capabilities,max_packet_length;
uint client_capabilities,sql_mode,max_packet_length;
uint master_access,db_access;
TABLE *open_tables,*temporary_tables, *handler_tables;
MYSQL_LOCK *lock,*locked_tables;

View File

@ -121,7 +121,7 @@ void lex_init(void)
state_map[(uchar)'*']= (uchar) STATE_END_LONG_COMMENT;
state_map[(uchar)'@']= (uchar) STATE_USER_END;
state_map[(uchar) '`']= (uchar) STATE_USER_VARIABLE_DELIMITER;
if (thd_startup_options & OPTION_ANSI_MODE)
if (opt_sql_mode & MODE_ANSI_QUOTES)
{
state_map[(uchar) '"'] = STATE_USER_VARIABLE_DELIMITER;
}
@ -149,7 +149,7 @@ LEX *lex_start(THD *thd, uchar *buf,uint length)
lex->select->ftfunc_list.empty();
lex->convert_set=(lex->thd=thd)->convert_set;
lex->yacc_yyss=lex->yacc_yyvs=0;
lex->ignore_space=test(thd->client_capabilities & CLIENT_IGNORE_SPACE);
lex->ignore_space=test(thd->sql_mode & MODE_IGNORE_SPACE);
return lex;
}

View File

@ -417,6 +417,8 @@ check_connections(THD *thd)
return(ER_OUT_OF_RESOURCES);
thd->client_capabilities=uint2korr(net->read_pos);
if (thd->client_capabilities & CLIENT_IGNORE_SPACE)
thd->sql_mode|= MODE_IGNORE_SPACE;
#ifdef HAVE_OPENSSL
DBUG_PRINT("info",
("pkt_len:%d, client capabilities: %d",
@ -541,8 +543,6 @@ pthread_handler_decl(handle_one_connection,arg)
thd->options |= OPTION_BIG_SELECTS;
if (thd->client_capabilities & CLIENT_COMPRESS)
net->compress=1; // Use compression
if (thd->options & OPTION_ANSI_MODE)
thd->client_capabilities|=CLIENT_IGNORE_SPACE;
thd->proc_info=0; // Remove 'login'
thd->command=COM_SLEEP;

Some files were not shown because too many files have changed in this diff Show More