From acc83c2ba19b93acc227579e09307f3556474bd8 Mon Sep 17 00:00:00 2001
From: Timothy Smith <timothy.smith@sun.com>
Date: Mon, 2 Mar 2009 17:24:23 -0700
Subject: [PATCH 1/3] Applying InnoDB snashot 5.0-ss4007, part 1.  Fixes

Bug #39939: DROP TABLE/DISCARD TABLESPACE takes long time in buf_LRU_invalidate_tablespace()

This was already fixed in 5.1+; this is a backport to 5.0.

Detailed revision comments:

r2743 | inaam | 2008-10-08 22:18:12 +0300 (Wed, 08 Oct 2008) | 13 lines
branches/5.0:

Backport of r2742 from branches/5.1:

Fix Bug#39939 DROP TABLE/DISCARD TABLESPACE takes long time in
buf_LRU_invalidate_tablespace()

Improve implementation of buf_LRU_invalidate_tablespace by attempting
hash index drop in batches instead of doing it one by one.

Reviewed by: Heikki, Sunny, Marko
Approved by: Heikki
---
 innobase/buf/buf0lru.c | 127 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 127 insertions(+)

diff --git a/innobase/buf/buf0lru.c b/innobase/buf/buf0lru.c
index 0f632f0752a..6984c196701 100644
--- a/innobase/buf/buf0lru.c
+++ b/innobase/buf/buf0lru.c
@@ -42,6 +42,11 @@ initial segment in buf_LRU_get_recent_limit */
 
 #define BUF_LRU_INITIAL_RATIO	8
 
+/* When dropping the search hash index entries before deleting an ibd
+file, we build a local array of pages belonging to that tablespace
+in the buffer pool. Following is the size of that array. */
+#define BUF_LRU_DROP_SEARCH_HASH_SIZE	1024
+
 /* If we switch on the InnoDB monitor because there are too few available
 frames in the buffer pool, we set this to TRUE */
 ibool	buf_lru_switched_on_innodb_mon	= FALSE;
@@ -65,6 +70,120 @@ buf_LRU_block_free_hashed_page(
 	buf_block_t*	block);	/* in: block, must contain a file page and
 				be in a state where it can be freed */
 
+/**********************************************************************
+Attempts to drop page hash index on a batch of pages belonging to a
+particular space id. */
+static
+void
+buf_LRU_drop_page_hash_batch(
+/*=========================*/
+	ulint		id,	/* in: space id */
+	const ulint*	arr,	/* in: array of page_no */
+	ulint		count)	/* in: number of entries in array */
+{
+	ulint	i;
+
+	ut_ad(arr != NULL);
+	ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE);
+
+	for (i = 0; i < count; ++i) {
+		btr_search_drop_page_hash_when_freed(id, arr[i]);
+	}
+}
+
+/**********************************************************************
+When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page
+hash index entries belonging to that table. This function tries to
+do that in batch. Note that this is a 'best effort' attempt and does
+not guarantee that ALL hash entries will be removed. */
+static
+void
+buf_LRU_drop_page_hash_for_tablespace(
+/*==================================*/
+	ulint	id)	/* in: space id */
+{
+	buf_block_t*	block;
+	ulint*		page_arr;
+	ulint		num_entries;
+
+	page_arr = ut_malloc(sizeof(ulint)
+			     * BUF_LRU_DROP_SEARCH_HASH_SIZE);
+	mutex_enter(&buf_pool->mutex);
+
+scan_again:
+	num_entries = 0;
+	block = UT_LIST_GET_LAST(buf_pool->LRU);
+
+	while (block != NULL) {
+		buf_block_t*	prev_block;
+
+		mutex_enter(&block->mutex);
+		prev_block = UT_LIST_GET_PREV(LRU, block);
+
+		ut_a(block->state == BUF_BLOCK_FILE_PAGE);
+
+		if (block->space != id
+		    || block->buf_fix_count > 0
+		    || block->io_fix != 0) {
+			/* We leave the fixed pages as is in this scan.
+			To be dealt with later in the final scan. */
+			mutex_exit(&block->mutex);
+			goto next_page;
+		}
+
+		ut_ad(block->space == id);
+		if (block->is_hashed) {
+
+			/* Store the offset(i.e.: page_no) in the array
+			so that we can drop hash index in a batch
+			later. */
+			page_arr[num_entries] = block->offset;
+			mutex_exit(&block->mutex);
+			ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE);
+			++num_entries;
+
+			if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) {
+				goto next_page;
+			}
+			/* Array full. We release the buf_pool->mutex to
+			obey the latching order. */
+			mutex_exit(&buf_pool->mutex);
+
+			buf_LRU_drop_page_hash_batch(id, page_arr,
+						     num_entries);
+			num_entries = 0;
+			mutex_enter(&buf_pool->mutex);
+		} else {
+			mutex_exit(&block->mutex);
+		}
+
+next_page:
+		/* Note that we may have released the buf_pool->mutex
+		above after reading the prev_block during processing
+		of a page_hash_batch (i.e.: when the array was full).
+		This means that prev_block can change in LRU list.
+		This is OK because this function is a 'best effort'
+		to drop as many search hash entries as possible and
+		it does not guarantee that ALL such entries will be
+		dropped. */
+		block = prev_block;
+
+		/* If, however, block has been removed from LRU list
+		to the free list then we should restart the scan.
+		block->state is protected by buf_pool->mutex. */
+		if (block && block->state != BUF_BLOCK_FILE_PAGE) {
+			ut_a(num_entries == 0);
+			goto scan_again;
+		}
+	}
+
+	mutex_exit(&buf_pool->mutex);
+
+	/* Drop any remaining batch of search hashed pages. */
+	buf_LRU_drop_page_hash_batch(id, page_arr, num_entries);
+	ut_free(page_arr);
+}
+
 /**********************************************************************
 Invalidates all pages belonging to a given tablespace when we are deleting
 the data file(s) of that tablespace. */
@@ -78,6 +197,14 @@ buf_LRU_invalidate_tablespace(
 	ulint		page_no;
 	ibool		all_freed;
 
+	/* Before we attempt to drop pages one by one we first
+	attempt to drop page hash index entries in batches to make
+	it more efficient. The batching attempt is a best effort
+	attempt and does not guarantee that all pages hash entries
+	will be dropped. We get rid of remaining page hash entries
+	one by one below. */
+	buf_LRU_drop_page_hash_for_tablespace(id);
+
 scan_again:
 	mutex_enter(&(buf_pool->mutex));
 	

From fd5642b67d687a6f4aedd7e7185dab82cb12f007 Mon Sep 17 00:00:00 2001
From: Timothy Smith <timothy.smith@sun.com>
Date: Mon, 2 Mar 2009 17:57:09 -0700
Subject: [PATCH 2/3] Applying InnoDB snashot 5.0-ss4007, part 2.  Fixes

Bug #18828: If InnoDB runs out of undo slots, it returns misleading 'table is full'

This is a backport of code already in 5.1+.  The error message change referred
to in the detailed revision comments is still pending.

Detailed revision comments:

r3937 | calvin | 2009-01-15 03:11:56 +0200 (Thu, 15 Jan 2009) | 17 lines
branches/5.0:

Backport the fix for Bug#18828. Return DB_TOO_MANY_CONCURRENT_TRXS
when we run out of UNDO slots in the rollback segment. The backport
is requested by MySQL under bug#41529 - Safe handling of InnoDB running
out of undo log slots.

This is a partial fix since the MySQL error code requested to properly
report the error condition back to the client has not yet materialized.
Currently we have #ifdef'd the error code translation in ha_innodb.cc.
This will have to be changed as and when MySQl add the new requested
code or an equivalent code that we can then use.

Given the above, currently we will get the old behavior, not the
"fixed" and intended behavior.

Approved by:	Heikki (on IM)
---
 innobase/dict/dict0crea.c   |   3 +-
 innobase/include/db0err.h   |   5 ++
 innobase/include/trx0undo.h |  13 +++--
 innobase/row/row0mysql.c    |   3 +-
 innobase/trx/trx0rec.c      |  15 +++---
 innobase/trx/trx0undo.c     | 105 ++++++++++++++++++++++--------------
 sql/ha_innodb.cc            |  14 +++++
 7 files changed, 103 insertions(+), 55 deletions(-)

diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c
index e20d8b6e83a..12d99734796 100644
--- a/innobase/dict/dict0crea.c
+++ b/innobase/dict/dict0crea.c
@@ -1249,7 +1249,8 @@ dict_create_or_check_foreign_constraint_tables(void)
 		fprintf(stderr, "InnoDB: error %lu in creation\n",
 			(ulong) error);
 		
-		ut_a(error == DB_OUT_OF_FILE_SPACE);
+		ut_a(error == DB_OUT_OF_FILE_SPACE
+		     || error == DB_TOO_MANY_CONCURRENT_TRXS);
 
 		fprintf(stderr, "InnoDB: creation failed\n");
 		fprintf(stderr, "InnoDB: tablespace is full\n");
diff --git a/innobase/include/db0err.h b/innobase/include/db0err.h
index 247c5de67db..68bdcdc8b7f 100644
--- a/innobase/include/db0err.h
+++ b/innobase/include/db0err.h
@@ -70,6 +70,11 @@ Created 5/24/1996 Heikki Tuuri
 					work with e.g., FT indexes created by
 					a later version of the engine. */
 
+#define DB_TOO_MANY_CONCURRENT_TRXS 47	/* when InnoDB runs out of the
+					preconfigured undo slots, this can
+					only happen when there are too many
+					concurrent transactions */
+
 /* The following are partial failure codes */
 #define DB_FAIL 		1000
 #define DB_OVERFLOW 		1001
diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h
index 4f1847aa88c..152a09c0f76 100644
--- a/innobase/include/trx0undo.h
+++ b/innobase/include/trx0undo.h
@@ -222,13 +222,16 @@ trx_undo_lists_init(
 Assigns an undo log for a transaction. A new undo log is created or a cached
 undo log reused. */
 
-trx_undo_t*
+ulint
 trx_undo_assign_undo(
 /*=================*/
-			/* out: the undo log, NULL if did not succeed: out of
-			space */
-	trx_t*	trx,	/* in: transaction */
-	ulint	type);	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+				/* out: DB_SUCCESS if undo log assign
+				 * successful, possible error codes are:
+				 * ER_TOO_MANY_CONCURRENT_TRXS
+				 * DB_OUT_OF_FILE_SPAC
+				 * DB_OUT_OF_MEMORY */
+	trx_t*		trx,	/* in: transaction */
+	ulint		type);	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
 /**********************************************************************
 Sets the state of the undo log segment at a transaction finish. */
 
diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c
index 4bc5f39359c..d7213b25145 100644
--- a/innobase/row/row0mysql.c
+++ b/innobase/row/row0mysql.c
@@ -494,7 +494,8 @@ handle_new_error:
 		/* MySQL will roll back the latest SQL statement */
 	} else if (err == DB_ROW_IS_REFERENCED
 		   || err == DB_NO_REFERENCED_ROW
-		   || err == DB_CANNOT_ADD_CONSTRAINT) {
+		   || err == DB_CANNOT_ADD_CONSTRAINT
+		   || err == DB_TOO_MANY_CONCURRENT_TRXS) {
            	if (savept) {
 			/* Roll back the latest, possibly incomplete
 			insertion or update */
diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c
index 3b7171e6038..44b734625dd 100644
--- a/innobase/trx/trx0rec.c
+++ b/innobase/trx/trx0rec.c
@@ -1013,6 +1013,7 @@ trx_undo_report_row_operation(
 	ibool		is_insert;
 	trx_rseg_t*	rseg;
 	mtr_t		mtr;
+	ulint		err		= DB_SUCCESS;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
@@ -1024,7 +1025,7 @@ trx_undo_report_row_operation(
 
 		*roll_ptr = ut_dulint_zero;
 
-		return(DB_SUCCESS);
+		return(err);
 	}
 		
 	ut_ad(thr);
@@ -1042,7 +1043,7 @@ trx_undo_report_row_operation(
 
 		if (trx->insert_undo == NULL) {
 
-			trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
+			err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
 		}
 
 		undo = trx->insert_undo;
@@ -1052,7 +1053,7 @@ trx_undo_report_row_operation(
 
 		if (trx->update_undo == NULL) {
 
-			trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+			err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
 
 		}
 
@@ -1060,11 +1061,11 @@ trx_undo_report_row_operation(
 		is_insert = FALSE;
 	}
 
-	if (undo == NULL) {
-		/* Did not succeed: out of space */
+	if (err != DB_SUCCESS) {
+		/* Did not succeed: return the error encountered */
 		mutex_exit(&(trx->undo_mutex));
 
-		return(DB_OUT_OF_FILE_SPACE);
+		return(err);
 	}
 
 	page_no = undo->last_page_no;
@@ -1154,7 +1155,7 @@ trx_undo_report_row_operation(
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
-	return(DB_SUCCESS);
+	return(err);
 }
 
 /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c
index 251cd355897..997f25a66d8 100644
--- a/innobase/trx/trx0undo.c
+++ b/innobase/trx/trx0undo.c
@@ -374,27 +374,32 @@ trx_undo_page_init(
 /*******************************************************************
 Creates a new undo log segment in file. */
 static
-page_t*
+ulint
 trx_undo_seg_create(
 /*================*/
-				/* out: segment header page x-latched, NULL
-				if no space left */
+				/* out: DB_SUCCESS if page creation OK
+				possible error codes are:
+				DB_TOO_MANY_CONCURRENT_TRXS
+				DB_OUT_OF_FILE_SPACE */
 	trx_rseg_t*	rseg __attribute__((unused)),/* in: rollback segment */
 	trx_rsegf_t*	rseg_hdr,/* in: rollback segment header, page
 				x-latched */
 	ulint		type,	/* in: type of the segment: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
 	ulint*		id,	/* out: slot index within rseg header */
+	page_t**	undo_page,
+				/* out: segment header page x-latched, NULL
+				if there was an error */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ulint		slot_no;
 	ulint		space;
-	page_t* 	undo_page;
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
 	ulint		n_reserved;
 	ibool		success;
-	
+	ulint		err = DB_SUCCESS;
+
 	ut_ad(mtr && id && rseg_hdr);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(rseg->mutex)));
@@ -411,7 +416,7 @@ trx_undo_seg_create(
 "InnoDB: Warning: cannot find a free slot for an undo log. Do you have too\n"
 "InnoDB: many active transactions running concurrently?\n");
 
-		return(NULL);
+		return(DB_TOO_MANY_CONCURRENT_TRXS);
 	}
 
 	space = buf_frame_get_space_id(rseg_hdr);
@@ -420,29 +425,29 @@ trx_undo_seg_create(
 									mtr);
 	if (!success) {
 
-		return(NULL);
+		return(DB_OUT_OF_FILE_SPACE);
 	}
 
 	/* Allocate a new file segment for the undo log */
-	undo_page = fseg_create_general(space, 0,
+	*undo_page = fseg_create_general(space, 0,
 			TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, TRUE, mtr);
 
 	fil_space_release_free_extents(space, n_reserved);
 			
-	if (undo_page == NULL) {
+	if (*undo_page == NULL) {
 		/* No space left */
 
-		return(NULL);
+		return(DB_OUT_OF_FILE_SPACE);
 	}
 
 #ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
+	buf_page_dbg_add_level(*undo_page, SYNC_TRX_UNDO_PAGE);
 #endif /* UNIV_SYNC_DEBUG */
 
-	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+	page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
+	seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
 
-	trx_undo_page_init(undo_page, type, mtr);
+	trx_undo_page_init(*undo_page, type, mtr);
 
 	mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
 				TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
@@ -456,10 +461,11 @@ trx_undo_seg_create(
 					page_hdr + TRX_UNDO_PAGE_NODE, mtr);
 
 	trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
-				buf_frame_get_page_no(undo_page), mtr);
+				buf_frame_get_page_no(*undo_page), mtr);
+
 	*id = slot_no;
 	
-	return(undo_page);
+	return(err);
 }
 
 /**************************************************************************
@@ -1400,6 +1406,11 @@ trx_undo_mem_create(
 
 	undo = mem_alloc(sizeof(trx_undo_t));
 
+	if (undo == NULL) {
+
+		return NULL;
+	}
+
 	undo->id = id;
 	undo->type = type;
 	undo->state = TRX_UNDO_ACTIVE;
@@ -1479,11 +1490,15 @@ trx_undo_mem_free(
 /**************************************************************************
 Creates a new undo log. */
 static
-trx_undo_t*
+ulint
 trx_undo_create(
 /*============*/
-				/* out: undo log object, NULL if did not
-				succeed: out of space */
+				/* out: DB_SUCCESS if successful in creating
+				the new undo lob object, possible error
+				codes are: 
+				DB_TOO_MANY_CONCURRENT_TRXS
+				DB_OUT_OF_FILE_SPACE 
+				DB_OUT_OF_MEMORY*/
 	trx_t*		trx,	/* in: transaction */
 	trx_rseg_t*	rseg,	/* in: rollback segment memory copy */
 	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
@@ -1491,36 +1506,39 @@ trx_undo_create(
 	dulint		trx_id,	/* in: id of the trx for which the undo log
 				is created */ 
 	XID*		xid,	/* in: X/Open transaction identification*/
+	trx_undo_t**	undo,	/* out: the new undo log object, undefined
+				 * if did not succeed */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	trx_rsegf_t*	rseg_header;
 	ulint		page_no;
 	ulint		offset;
 	ulint		id;
-	trx_undo_t*	undo;
 	page_t*		undo_page;
-	
+	ulint		err;
+
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(mutex_own(&(rseg->mutex)));
 #endif /* UNIV_SYNC_DEBUG */
 
 	if (rseg->curr_size == rseg->max_size) {
 
-		return(NULL);
+		return(DB_OUT_OF_FILE_SPACE);
 	}
 
 	rseg->curr_size++;
 
 	rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
 
-	undo_page = trx_undo_seg_create(rseg, rseg_header, type, &id, mtr);
+	err = trx_undo_seg_create(rseg, rseg_header, type, &id,
+							&undo_page, mtr);
 	
-	if (undo_page == NULL) {
+	if (err != DB_SUCCESS) {
 		/* Did not succeed */
 
 		rseg->curr_size--;
 
-		return(NULL);
+		return(err);
 	}
 
 	page_no = buf_frame_get_page_no(undo_page);
@@ -1532,9 +1550,14 @@ trx_undo_create(
 					undo_page + offset, mtr);
 	}
 
-	undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
+	*undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
 							page_no, offset);
-	return(undo);
+	if (*undo == NULL) {
+
+		err = DB_OUT_OF_MEMORY;
+	}
+
+	return(err);
 }
 
 /*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
@@ -1653,17 +1676,20 @@ trx_undo_mark_as_dict_operation(
 Assigns an undo log for a transaction. A new undo log is created or a cached
 undo log reused. */
 
-trx_undo_t*
+ulint
 trx_undo_assign_undo(
 /*=================*/
-			/* out: the undo log, NULL if did not succeed: out of
-			space */
-	trx_t*	trx,	/* in: transaction */
-	ulint	type)	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+				/* out: DB_SUCCESS if undo log assign
+				successful, possible error codes are:
+				DD_TOO_MANY_CONCURRENT_TRXS
+				DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/
+	trx_t*		trx,	/* in: transaction */
+	ulint		type)	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
 {
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
 	mtr_t		mtr;
+	ulint		err = DB_SUCCESS;
 
 	ut_ad(trx);
 	ut_ad(trx->rseg);
@@ -1684,15 +1710,11 @@ trx_undo_assign_undo(
 	undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
 									&mtr);
 	if (undo == NULL) {
-		undo = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
-									&mtr);
-		if (undo == NULL) {
-			/* Did not succeed */
+		err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
+								&undo, &mtr);
+		if (err != DB_SUCCESS) {
 
-			mutex_exit(&(rseg->mutex));
-			mtr_commit(&mtr);
-
-			return(NULL);
+			goto func_exit;
 		}
 	}
 
@@ -1710,10 +1732,11 @@ trx_undo_assign_undo(
 		trx_undo_mark_as_dict_operation(trx, undo, &mtr);
 	}
 
+func_exit:
 	mutex_exit(&(rseg->mutex));
 	mtr_commit(&mtr);
 
-	return(undo);
+	return err;
 }
 
 /**********************************************************************
diff --git a/sql/ha_innodb.cc b/sql/ha_innodb.cc
index e0b7fb6e7f5..3bae0da3e02 100644
--- a/sql/ha_innodb.cc
+++ b/sql/ha_innodb.cc
@@ -524,6 +524,20 @@ convert_error_code_to_mysql(
                 mark_transaction_to_rollback(thd, TRUE);
 
     		return(HA_ERR_LOCK_TABLE_FULL);
+	} else if (error == DB_TOO_MANY_CONCURRENT_TRXS) {
+
+		/* Once MySQL add the appropriate code to errmsg.txt then
+		we can get rid of this #ifdef. NOTE: The code checked by
+		the #ifdef is the suggested name for the error condition
+		and the actual error code name could very well be different.
+		This will require some monitoring, ie. the status
+		of this request on our part.*/
+#ifdef ER_TOO_MANY_CONCURRENT_TRXS
+		return(ER_TOO_MANY_CONCURRENT_TRXS);
+#else
+		return(HA_ERR_RECORD_FILE_FULL);
+#endif
+
 	} else if (error == DB_UNSUPPORTED) {
 
 		return(HA_ERR_UNSUPPORTED);

From 6ac6c9ed4880224a3dc1f631c3d78b9b1acfc41b Mon Sep 17 00:00:00 2001
From: Timothy Smith <timothy.smith@sun.com>
Date: Mon, 2 Mar 2009 18:00:23 -0700
Subject: [PATCH 3/3] Applying InnoDB snashot 5.0-ss4007, part 3.  Fixes

Bug #41571: MySQL segfaults after innodb recovery

This 5.0 fix will not be pushed into 5.1; a separate fix (from
innodb-5.1-ss4007) will be pushed into 5.1+.

Detailed revision comments:

r4003 | marko | 2009-01-20 16:12:50 +0200 (Tue, 20 Jan 2009) | 10 lines
branches/5.0: rec_set_nth_field(): When the field already is SQL null,
do nothing when it is being changed to SQL null. (Bug #41571)

Normally, MySQL does not pass "do-nothing" updates to the storage engine.
When it does and a column of an InnoDB table that is in ROW_FORMAT=COMPACT
is being updated from NULL to NULL, the InnoDB buffer pool will be corrupted
without this fix.

rb://81 approved by Heikki Tuuri
---
 innobase/include/rem0rec.h  | 11 ++++-------
 innobase/include/rem0rec.ic | 19 +++++++++----------
 2 files changed, 13 insertions(+), 17 deletions(-)

diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
index 69b397c9682..b573c5d4c3b 100644
--- a/innobase/include/rem0rec.h
+++ b/innobase/include/rem0rec.h
@@ -368,8 +368,9 @@ rec_set_field_extern_bits(
 /*************************************************************** 
 This is used to modify the value of an already existing field in a record.
 The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null for old-style
-records. For new-style records, len must not be UNIV_SQL_NULL. */
+is UNIV_SQL_NULL then the field is treated as an SQL null.
+For records in ROW_FORMAT=COMPACT (new-style records), len must not be
+UNIV_SQL_NULL unless the field already is SQL null. */
 UNIV_INLINE
 void
 rec_set_nth_field(
@@ -378,11 +379,7 @@ rec_set_nth_field(
 	const ulint*	offsets,/* in: array returned by rec_get_offsets() */
 	ulint		n,	/* in: index number of the field */
 	const void*	data,	/* in: pointer to the data if not SQL null */
-	ulint		len);	/* in: length of the data or UNIV_SQL_NULL.
-				If not SQL null, must have the same
-				length as the previous value.
-				If SQL null, previous value must be
-				SQL null. */
+	ulint		len);	/* in: length of the data or UNIV_SQL_NULL */
 /************************************************************** 
 The following function returns the data size of an old-style physical
 record, that is the sum of field lengths. SQL null fields
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
index 1abbb503bab..64c91724386 100644
--- a/innobase/include/rem0rec.ic
+++ b/innobase/include/rem0rec.ic
@@ -1204,8 +1204,9 @@ rec_get_nth_field_size(
 /*************************************************************** 
 This is used to modify the value of an already existing field in a record.
 The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null for old-style
-records. For new-style records, len must not be UNIV_SQL_NULL. */
+is UNIV_SQL_NULL then the field is treated as an SQL null.
+For records in ROW_FORMAT=COMPACT (new-style records), len must not be
+UNIV_SQL_NULL unless the field already is SQL null. */
 UNIV_INLINE
 void
 rec_set_nth_field(
@@ -1215,11 +1216,7 @@ rec_set_nth_field(
 	ulint		n,	/* in: index number of the field */
 	const void*	data,	/* in: pointer to the data
 				if not SQL null */
-	ulint		len)	/* in: length of the data or UNIV_SQL_NULL.
-				If not SQL null, must have the same
-				length as the previous value.
-				If SQL null, previous value must be
-				SQL null. */
+	ulint		len)	/* in: length of the data or UNIV_SQL_NULL */
 {
 	byte*	data2;
 	ulint	len2;
@@ -1227,9 +1224,11 @@ rec_set_nth_field(
 	ut_ad(rec);
 	ut_ad(rec_offs_validate(rec, NULL, offsets));
 
-	if (len == UNIV_SQL_NULL) {
-		ut_ad(!rec_offs_comp(offsets));
-		rec_set_nth_field_sql_null(rec, n);
+	if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) {
+		if (!rec_offs_nth_sql_null(offsets, n)) {
+			ut_a(!rec_offs_comp(offsets));
+			rec_set_nth_field_sql_null(rec, n);
+		}
 
 		return;
 	}