Bug 12635227 - 61188: DROP TABLE EXTREMELY SLOW

approved by: Marko
rb://681

Coalescing of free buf_page_t descriptors can prove to be one severe
bottleneck in performance of compression. One such workload where it
hurts badly is DROP TABLE. This patch removes buf_page_t allocations
from buf_buddy and uses ut_malloc instead.
In order to further reduce overhead of colaescing we no longer attempt
to coalesce a block if the corresponding free_list is less than 16 in
size.
This commit is contained in:
Inaam Rana 2011-06-17 16:20:20 -04:00
parent 0aa578ce13
commit b3696af2e6
11 changed files with 289 additions and 529 deletions

View File

@ -1,3 +1,10 @@
2011-06-16 The InnoDB Team
* btr/btr0cur.c, buf/buf0buddy.c, buf/buf0buf.c, buf/buf0lru.c,
include/buf0buddy.h, include/buf0buddy.ic, include/buf0buf.h,
include/buf0buf.ic, include/buf0lru.h, include/buf0types.h:
Fix Bug#61188 DROP TABLE extremely slow
2011-06-16 The InnoDB Team
* buf/buf0buddy.c, buf/buf0buf.c, buf/buf0flu.c, buf/buf0lru.c,

View File

@ -3864,7 +3864,7 @@ btr_blob_free(
&& buf_block_get_space(block) == space
&& buf_block_get_page_no(block) == page_no) {
if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED
if (!buf_LRU_free_block(&block->page, all)
&& all && block->page.zip.data) {
/* Attempt to deallocate the uncompressed page
if the whole block cannot be deallocted. */

View File

@ -45,6 +45,14 @@ static ulint buf_buddy_n_frames;
Protected by buf_pool_mutex. */
UNIV_INTERN buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1];
/** Validate a given zip_free list. */
#define BUF_BUDDY_LIST_VALIDATE(i) \
UT_LIST_VALIDATE(list, buf_page_t, \
buf_pool->zip_free[i], \
ut_ad(buf_page_get_state( \
ut_list_node_313) \
== BUF_BLOCK_ZIP_FREE))
/**********************************************************************//**
Get the offset of the buddy of a compressed page frame.
@return the buddy relative of page */
@ -76,21 +84,10 @@ buf_buddy_add_to_free(
buf_page_t* bpage, /*!< in,own: block to be freed */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
buf_page_t* b = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (b) UNIV_MEM_VALID(b, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
ut_ad(buf_pool->zip_free[i].start != bpage);
UT_LIST_ADD_FIRST(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (b) UNIV_MEM_FREE(b, BUF_BUDDY_LOW << i);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
@ -102,25 +99,17 @@ buf_buddy_remove_from_free(
buf_page_t* bpage, /*!< in: block to be removed */
ulint i) /*!< in: index of buf_pool->zip_free[] */
{
#ifdef UNIV_DEBUG_VALGRIND
#ifdef UNIV_DEBUG
buf_page_t* prev = UT_LIST_GET_PREV(list, bpage);
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
if (prev) UNIV_MEM_VALID(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_VALID(next, BUF_BUDDY_LOW << i);
ut_ad(!prev || buf_page_get_state(prev) == BUF_BLOCK_ZIP_FREE);
ut_ad(!next || buf_page_get_state(next) == BUF_BLOCK_ZIP_FREE);
#endif /* UNIV_DEBUG_VALGRIND */
#endif /* UNIV_DEBUG */
ut_ad(buf_pool_mutex_own());
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
UT_LIST_REMOVE(list, buf_pool->zip_free[i], bpage);
#ifdef UNIV_DEBUG_VALGRIND
if (prev) UNIV_MEM_FREE(prev, BUF_BUDDY_LOW << i);
if (next) UNIV_MEM_FREE(next, BUF_BUDDY_LOW << i);
#endif /* UNIV_DEBUG_VALGRIND */
}
/**********************************************************************//**
@ -136,17 +125,13 @@ buf_buddy_alloc_zip(
ut_ad(buf_pool_mutex_own());
ut_a(i < BUF_BUDDY_SIZES);
ut_a(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_d(BUF_BUDDY_LIST_VALIDATE(i));
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_a(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
buf_buddy_remove_from_free(bpage, i);
@ -165,13 +150,10 @@ buf_buddy_alloc_zip(
}
}
#ifdef UNIV_DEBUG
if (bpage) {
memset(bpage, ~i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
ut_d(memset(bpage, ~i, BUF_BUDDY_LOW << i));
UNIV_MEM_ALLOC(bpage, BUF_BUDDY_SIZES << i);
}
return(bpage);
}
@ -255,6 +237,7 @@ buf_buddy_alloc_from(
{
ulint offs = BUF_BUDDY_LOW << j;
ut_ad(j <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(j >= i);
ut_ad(!ut_align_offset(buf, offs));
@ -268,13 +251,7 @@ buf_buddy_alloc_from(
bpage = (buf_page_t*) ((byte*) buf + offs);
ut_d(memset(bpage, j, BUF_BUDDY_LOW << j));
bpage->state = BUF_BLOCK_ZIP_FREE;
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* !UNIV_DEBUG_VALGRIND */
ut_d(BUF_BUDDY_LIST_VALIDATE(i));
buf_buddy_add_to_free(bpage, j);
}
@ -284,8 +261,8 @@ buf_buddy_alloc_from(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
The buf_pool_mutex may be released and reacquired.
@return allocated block, never NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
@ -294,13 +271,14 @@ buf_buddy_alloc_low(
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
and buf_pool_mutex was temporarily released */
{
buf_block_t* block;
ut_ad(lru);
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
if (i < BUF_BUDDY_SIZES) {
/* Try to allocate from the buddy system. */
@ -320,11 +298,6 @@ buf_buddy_alloc_low(
goto alloc_big;
}
if (!lru) {
return(NULL);
}
/* Try replacing an uncompressed page in the buffer pool. */
buf_pool_mutex_exit();
block = buf_LRU_get_free_block();
@ -341,65 +314,6 @@ func_exit:
return(block);
}
/**********************************************************************//**
Try to relocate the control block of a compressed page.
@return TRUE if relocated */
static
ibool
buf_buddy_relocate_block(
/*=====================*/
buf_page_t* bpage, /*!< in: block to relocate */
buf_page_t* dpage) /*!< in: free block to relocate to */
{
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
buf_page_t* b;
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
ut_ad(buf_pool_mutex_own());
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
case BUF_BLOCK_NOT_USED:
case BUF_BLOCK_READY_FOR_USE:
case BUF_BLOCK_FILE_PAGE:
case BUF_BLOCK_MEMORY:
case BUF_BLOCK_REMOVE_HASH:
ut_error;
case BUF_BLOCK_ZIP_DIRTY:
/* Cannot relocate dirty pages. */
return(FALSE);
case BUF_BLOCK_ZIP_PAGE:
break;
}
mutex_enter(&buf_pool_zip_mutex);
if (!buf_page_can_relocate(bpage)) {
mutex_exit(&buf_pool_zip_mutex);
return(FALSE);
}
buf_relocate(bpage, dpage);
ut_d(bpage->state = BUF_BLOCK_ZIP_FREE);
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/* relocate buf_pool->zip_clean */
b = UT_LIST_GET_PREV(list, dpage);
UT_LIST_REMOVE(list, buf_pool->zip_clean, dpage);
if (b) {
UT_LIST_INSERT_AFTER(list, buf_pool->zip_clean, b, dpage);
} else {
UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
}
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
UNIV_MEM_INVALID(bpage, sizeof *bpage);
mutex_exit(&buf_pool_zip_mutex);
return(TRUE);
}
/**********************************************************************//**
Try to relocate a block.
@return TRUE if relocated */
@ -414,28 +328,25 @@ buf_buddy_relocate(
buf_page_t* bpage;
const ulint size = BUF_BUDDY_LOW << i;
ullint usec = ut_time_us(NULL);
mutex_t* mutex;
ulint space;
ulint page_no;
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(!ut_align_offset(src, size));
ut_ad(!ut_align_offset(dst, size));
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
UNIV_MEM_ASSERT_W(dst, size);
/* We assume that all memory from buf_buddy_alloc()
is used for either compressed pages or buf_page_t
objects covering compressed pages. */
is used for compressed page frames. */
/* We look inside the allocated objects returned by
buf_buddy_alloc() and assume that anything of
PAGE_ZIP_MIN_SIZE or larger is a compressed page that contains
a valid space_id and page_no in the page header. Should the
fields be invalid, we will be unable to relocate the block.
We also assume that anything that fits sizeof(buf_page_t)
actually is a properly initialized buf_page_t object. */
if (size >= PAGE_ZIP_MIN_SIZE) {
/* This is a compressed page. */
mutex_t* mutex;
buf_buddy_alloc() and assume that each block is a compressed
page that contains a valid space_id and page_no in the page
header. Should the fields be invalid, we will be unable to
relocate the block. */
/* The src block may be split into smaller blocks,
some of which may be free. Thus, the
@ -445,10 +356,10 @@ buf_buddy_relocate(
pool), so there is nothing wrong about this. The
mach_read_from_4() calls here will only trigger bogus
Valgrind memcheck warnings in UNIV_DEBUG_VALGRIND builds. */
ulint space = mach_read_from_4(
(const byte*) src + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
ulint page_no = mach_read_from_4(
(const byte*) src + FIL_PAGE_OFFSET);
space = mach_read_from_4((const byte *) src
+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
page_no = mach_read_from_4((const byte *) src
+ FIL_PAGE_OFFSET);
/* Suppress Valgrind warnings about conditional jump
on uninitialized value. */
UNIV_MEM_VALID(&space, sizeof space);
@ -487,7 +398,6 @@ buf_buddy_relocate(
memcpy(dst, src, size);
bpage->zip.data = dst;
mutex_exit(mutex);
success:
UNIV_MEM_INVALID(src, size);
{
buf_buddy_stat_t* buddy_stat
@ -500,19 +410,6 @@ success:
}
mutex_exit(mutex);
} else if (i == buf_buddy_get_slot(sizeof(buf_page_t))) {
/* This must be a buf_page_t object. */
#if UNIV_WORD_SIZE == 4
/* On 32-bit systems, there is no padding in
buf_page_t. On other systems, Valgrind could complain
about uninitialized pad bytes. */
UNIV_MEM_ASSERT_RW(src, size);
#endif
if (buf_buddy_relocate_block(src, dst)) {
goto success;
}
}
return(FALSE);
}
@ -534,12 +431,14 @@ buf_buddy_free_low(
ut_ad(buf_pool_mutex_own());
ut_ad(!mutex_own(&buf_pool_zip_mutex));
ut_ad(i <= BUF_BUDDY_SIZES);
ut_ad(i >= buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE));
ut_ad(buf_buddy_stat[i].used > 0);
buf_buddy_stat[i].used--;
recombine:
UNIV_MEM_ASSERT_AND_ALLOC(buf, BUF_BUDDY_LOW << i);
ut_d(((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE);
((buf_page_t*) buf)->state = BUF_BLOCK_ZIP_FREE;
if (i == BUF_BUDDY_SIZES) {
buf_buddy_block_free(buf);
@ -550,32 +449,36 @@ recombine:
ut_ad(buf == ut_align_down(buf, BUF_BUDDY_LOW << i));
ut_ad(!buf_pool_contains_zip(buf));
/* Try to combine adjacent blocks. */
/* Do not recombine blocks if there are few free blocks.
We may waste up to 15360*max_len bytes to free blocks
(1024 + 2048 + 4096 + 8192 = 15360) */
if (UT_LIST_GET_LEN(buf_pool->zip_free[i]) < 16) {
goto func_exit;
}
/* Try to combine adjacent blocks. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) buf), BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
/* When Valgrind instrumentation is not enabled, we can read
buddy->state to quickly determine that a block is not free.
When the block is not free, buddy->state belongs to a compressed
page frame that may be flagged uninitialized in our Valgrind
instrumentation. */
if (buddy->state != BUF_BLOCK_ZIP_FREE) {
goto buddy_nonfree;
}
/* The field buddy->state can only be trusted for free blocks.
If buddy->state == BUF_BLOCK_ZIP_FREE, the block is free if
it is in the free list. */
#endif /* !UNIV_DEBUG_VALGRIND */
for (bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]); bpage; ) {
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_ad(buf_page_get_state(bpage) == BUF_BLOCK_ZIP_FREE);
if (bpage == buddy) {
buddy_free:
/* The buddy is free: recombine */
buf_buddy_remove_from_free(bpage, i);
buddy_free2:
buddy_is_free:
ut_ad(buf_page_get_state(buddy) == BUF_BLOCK_ZIP_FREE);
ut_ad(!buf_pool_contains_zip(buddy));
i++;
@ -585,122 +488,43 @@ buddy_free2:
}
ut_a(bpage != buf);
{
buf_page_t* next = UT_LIST_GET_NEXT(list, bpage);
UNIV_MEM_ASSERT_AND_FREE(bpage, BUF_BUDDY_LOW << i);
bpage = next;
}
UNIV_MEM_ASSERT_W(bpage, BUF_BUDDY_LOW << i);
bpage = UT_LIST_GET_NEXT(list, bpage);
}
#ifndef UNIV_DEBUG_VALGRIND
buddy_nonfree:
/* Valgrind would complain about accessing free memory. */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(ut_list_node_313)
== BUF_BLOCK_ZIP_FREE)));
#endif /* UNIV_DEBUG_VALGRIND */
#endif /* !UNIV_DEBUG_VALGRIND */
ut_d(BUF_BUDDY_LIST_VALIDATE(i));
/* The buddy is not free. Is there a free block of this size? */
bpage = UT_LIST_GET_FIRST(buf_pool->zip_free[i]);
if (bpage) {
/* Remove the block from the free list, because a successful
buf_buddy_relocate() will overwrite bpage->list. */
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
buf_buddy_remove_from_free(bpage, i);
/* Try to relocate the buddy of buf to the free block. */
if (buf_buddy_relocate(buddy, bpage, i)) {
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free2;
buddy->state = BUF_BLOCK_ZIP_FREE;
goto buddy_is_free;
}
buf_buddy_add_to_free(bpage, i);
/* Try to relocate the buddy of the free block to buf. */
buddy = (buf_page_t*) buf_buddy_get(((byte*) bpage),
BUF_BUDDY_LOW << i);
#ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing free memory. */
/* The buddy must not be (completely) free, because we
always recombine adjacent free blocks.
(Parts of the buddy can be free in
buf_pool->zip_free[j] with j < i.) */
ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->zip_free[i],
ut_ad(buf_page_get_state(
ut_list_node_313)
== BUF_BLOCK_ZIP_FREE
&& ut_list_node_313 != buddy)));
#endif /* !UNIV_DEBUG_VALGRIND */
if (buf_buddy_relocate(buddy, buf, i)) {
buf = bpage;
UNIV_MEM_VALID(bpage, BUF_BUDDY_LOW << i);
ut_d(buddy->state = BUF_BLOCK_ZIP_FREE);
goto buddy_free;
}
}
func_exit:
/* Free the block to the buddy list. */
bpage = buf;
#ifdef UNIV_DEBUG
if (i < buf_buddy_get_slot(PAGE_ZIP_MIN_SIZE)) {
/* This area has most likely been allocated for at
least one compressed-only block descriptor. Check
that there are no live objects in the area. This is
not a complete check: it may yield false positives as
well as false negatives. Also, due to buddy blocks
being recombined, it is possible (although unlikely)
that this branch is never reached. */
char* c;
# ifndef UNIV_DEBUG_VALGRIND
/* Valgrind would complain about accessing
uninitialized memory. Besides, Valgrind performs a
more exhaustive check, at every memory access. */
const buf_page_t* b = buf;
const buf_page_t* const b_end = (buf_page_t*)
((char*) b + (BUF_BUDDY_LOW << i));
for (; b < b_end; b++) {
/* Avoid false positives (and cause false
negatives) by checking for b->space < 1000. */
if ((b->state == BUF_BLOCK_ZIP_PAGE
|| b->state == BUF_BLOCK_ZIP_DIRTY)
&& b->space > 0 && b->space < 1000) {
fprintf(stderr,
"buddy dirty %p %u (%u,%u) %p,%lu\n",
(void*) b,
b->state, b->space, b->offset,
buf, i);
}
}
# endif /* !UNIV_DEBUG_VALGRIND */
/* Scramble the block. This should make any pointers
invalid and trigger a segmentation violation. Because
the scrambling can be reversed, it may be possible to
track down the object pointing to the freed data by
dereferencing the unscrambled bpage->LRU or
bpage->list pointers. */
for (c = (char*) buf + (BUF_BUDDY_LOW << i);
c-- > (char*) buf; ) {
*c = ~*c ^ i;
}
} else {
/* Fill large blocks with a constant pattern. */
memset(bpage, i, BUF_BUDDY_LOW << i);
}
#endif /* UNIV_DEBUG */
ut_d(memset(bpage, i, BUF_BUDDY_LOW << i));
UNIV_MEM_INVALID(bpage, BUF_BUDDY_LOW << i);
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_add_to_free(bpage, i);
}

View File

@ -1358,7 +1358,7 @@ err_exit:
mutex_enter(block_mutex);
/* Discard the uncompressed page frame if possible. */
if (buf_LRU_free_block(bpage, FALSE) == BUF_LRU_FREED) {
if (buf_LRU_free_block(bpage, FALSE)) {
mutex_exit(block_mutex);
goto lookup;
@ -1699,13 +1699,8 @@ loop:
if (block) {
/* If the guess is a compressed page descriptor that
has been allocated by buf_buddy_alloc(), it may have
been invalidated by buf_buddy_relocate(). In that
case, block could point to something that happens to
contain the expected bits in block->page. Similarly,
the guess may be pointing to a buffer pool chunk that
has been released when resizing the buffer pool. */
has been allocated by buf_page_alloc_descriptor(),
it may have been freed by buf_relocate(). */
if (!buf_block_is_uncompressed(block)
|| offset != block->page.offset
|| space != block->page.space
@ -1889,11 +1884,10 @@ wait_until_unfixed:
mutex_exit(&buf_pool_zip_mutex);
buf_pool->n_pend_unzip++;
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_pool_mutex_exit();
buf_page_free_descriptor(bpage);
/* Decompress the page and apply buffered operations
while not holding buf_pool_mutex or block->mutex. */
success = buf_zip_decompress(block, srv_use_checksums);
@ -1937,7 +1931,7 @@ wait_until_unfixed:
/* Try to evict the block from the buffer pool, to use the
insert buffer as much as possible. */
if (buf_LRU_free_block(&block->page, TRUE) == BUF_LRU_FREED) {
if (buf_LRU_free_block(&block->page, TRUE)) {
buf_pool_mutex_exit();
mutex_exit(&block->mutex);
fprintf(stderr,
@ -2551,17 +2545,12 @@ err_exit:
mutex_exit(&block->mutex);
} else {
/* Defer buf_buddy_alloc() until after the block has
been found not to exist. The buf_buddy_alloc() and
buf_buddy_free() calls may be expensive because of
buf_buddy_relocate(). */
/* The compressed page must be allocated before the
control block (bpage), in order to avoid the
invocation of buf_buddy_relocate_block() on
uninitialized data. */
data = buf_buddy_alloc(zip_size, &lru);
bpage = buf_buddy_alloc(sizeof *bpage, &lru);
/* If buf_buddy_alloc() allocated storage from the LRU list,
it released and reacquired buf_pool_mutex. Thus, we must
@ -2569,15 +2558,13 @@ err_exit:
if (UNIV_UNLIKELY(lru)
&& UNIV_LIKELY_NULL(buf_page_hash_get(space, offset))) {
/* The block was added by some other thread. */
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof *bpage);
buf_buddy_free(data, zip_size);
bpage = NULL;
goto func_exit;
}
bpage = buf_page_alloc_descriptor();
page_zip_des_init(&bpage->zip);
page_zip_set_size(&bpage->zip, zip_size);
bpage->zip.data = data;

View File

@ -355,7 +355,7 @@ scan_again:
while (bpage != NULL) {
buf_page_t* prev_bpage;
ibool prev_bpage_buf_fix = FALSE;
mutex_t* block_mutex = NULL;
ut_a(buf_page_in_file(bpage));
@ -368,18 +368,21 @@ scan_again:
if (buf_page_get_space(bpage) != id) {
/* Skip this block, as it does not belong to
the space that is being invalidated. */
goto next_page;
} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
/* We cannot remove this page during this scan
yet; maybe the system is currently reading it
in, or flushing the modifications to the file */
all_freed = FALSE;
goto next_page;
} else {
mutex_t* block_mutex = buf_page_get_mutex(bpage);
block_mutex = buf_page_get_mutex(bpage);
mutex_enter(block_mutex);
if (bpage->buf_fix_count > 0) {
mutex_exit(block_mutex);
/* We cannot remove this page during
this scan yet; maybe the system is
currently reading it in, or flushing
@ -389,6 +392,9 @@ scan_again:
goto next_page;
}
}
ut_ad(mutex_own(block_mutex));
#ifdef UNIV_DEBUG
if (buf_debug_prints) {
@ -400,37 +406,7 @@ scan_again:
#endif
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
/* This is a compressed-only block
descriptor. Ensure that prev_bpage
cannot be relocated when bpage is freed. */
if (UNIV_LIKELY(prev_bpage != NULL)) {
switch (buf_page_get_state(
prev_bpage)) {
case BUF_BLOCK_FILE_PAGE:
/* Descriptors of uncompressed
blocks will not be relocated,
because we are holding the
buf_pool_mutex. */
break;
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
/* Descriptors of compressed-
only blocks can be relocated,
unless they are buffer-fixed.
Because both bpage and
prev_bpage are protected by
buf_pool_zip_mutex, it is
not necessary to acquire
further mutexes. */
ut_ad(&buf_pool_zip_mutex
== block_mutex);
ut_ad(mutex_own(block_mutex));
prev_bpage_buf_fix = TRUE;
prev_bpage->buf_fix_count++;
break;
default:
ut_error;
}
}
descriptor. Do nothing. */
} else if (((buf_block_t*) bpage)->is_hashed) {
ulint page_no;
ulint zip_size;
@ -459,36 +435,16 @@ scan_again:
if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
!= BUF_BLOCK_ZIP_FREE) {
buf_LRU_block_free_hashed_page((buf_block_t*)
bpage);
buf_LRU_block_free_hashed_page((buf_block_t*) bpage);
mutex_exit(block_mutex);
} else {
/* The block_mutex should have been
released by buf_LRU_block_remove_hashed_page()
when it returns BUF_BLOCK_ZIP_FREE. */
/* The block_mutex should have been released
by buf_LRU_block_remove_hashed_page() when it
returns BUF_BLOCK_ZIP_FREE. */
ut_ad(block_mutex == &buf_pool_zip_mutex);
ut_ad(!mutex_own(block_mutex));
if (prev_bpage_buf_fix) {
/* We temporarily buffer-fixed
prev_bpage, so that
buf_buddy_free() could not
relocate it, in case it was a
compressed-only block
descriptor. */
mutex_enter(block_mutex);
ut_ad(prev_bpage->buf_fix_count > 0);
prev_bpage->buf_fix_count--;
mutex_exit(block_mutex);
}
goto next_page_no_mutex;
}
next_page:
mutex_exit(block_mutex);
}
next_page_no_mutex:
bpage = prev_bpage;
}
@ -574,7 +530,7 @@ buf_LRU_free_from_unzip_LRU_list(
UNIV_LIKELY(block != NULL) && UNIV_LIKELY(distance > 0);
block = UT_LIST_GET_PREV(unzip_LRU, block), distance--) {
enum buf_lru_free_block_status freed;
ibool freed;
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_ad(block->in_unzip_LRU_list);
@ -584,24 +540,9 @@ buf_LRU_free_from_unzip_LRU_list(
freed = buf_LRU_free_block(&block->page, FALSE);
mutex_exit(&block->mutex);
switch (freed) {
case BUF_LRU_FREED:
if (freed) {
return(TRUE);
case BUF_LRU_CANNOT_RELOCATE:
/* If we failed to relocate, try
regular LRU eviction. */
return(FALSE);
case BUF_LRU_NOT_FREED:
/* The block was buffer-fixed or I/O-fixed.
Keep looking. */
continue;
}
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
return(FALSE);
@ -632,10 +573,9 @@ buf_LRU_free_from_common_LRU_list(
UNIV_LIKELY(bpage != NULL) && UNIV_LIKELY(distance > 0);
bpage = UT_LIST_GET_PREV(LRU, bpage), distance--) {
enum buf_lru_free_block_status freed;
ibool freed;
unsigned accessed;
mutex_t* block_mutex
= buf_page_get_mutex(bpage);
mutex_t* block_mutex = buf_page_get_mutex(bpage);
ut_ad(buf_page_in_file(bpage));
ut_ad(bpage->in_LRU_list);
@ -645,8 +585,7 @@ buf_LRU_free_from_common_LRU_list(
freed = buf_LRU_free_block(bpage, TRUE);
mutex_exit(block_mutex);
switch (freed) {
case BUF_LRU_FREED:
if (freed) {
/* Keep track of pages that are evicted without
ever being accessed. This gives us a measure of
the effectiveness of readahead */
@ -654,21 +593,7 @@ buf_LRU_free_from_common_LRU_list(
++buf_pool->stat.n_ra_pages_evicted;
}
return(TRUE);
case BUF_LRU_NOT_FREED:
/* The block was dirty, buffer-fixed, or I/O-fixed.
Keep looking. */
continue;
case BUF_LRU_CANNOT_RELOCATE:
/* This should never occur, because we
want to discard the compressed page too. */
break;
}
/* inappropriate return value from
buf_LRU_free_block() */
ut_error;
}
return(FALSE);
@ -1350,17 +1275,16 @@ buf_LRU_make_block_old(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily
NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
enum buf_lru_free_block_status
ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */
@ -1385,7 +1309,7 @@ buf_LRU_free_block(
if (!buf_page_can_relocate(bpage)) {
/* Do not free buffer-fixed or I/O-fixed blocks. */
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
#ifdef UNIV_IBUF_COUNT_DEBUG
@ -1397,7 +1321,7 @@ buf_LRU_free_block(
/* Do not completely free dirty blocks. */
if (bpage->oldest_modification) {
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
} else if (bpage->oldest_modification) {
/* Do not completely free dirty blocks. */
@ -1405,7 +1329,7 @@ buf_LRU_free_block(
if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
ut_ad(buf_page_get_state(bpage)
== BUF_BLOCK_ZIP_DIRTY);
return(BUF_LRU_NOT_FREED);
return(FALSE);
}
goto alloc;
@ -1414,14 +1338,8 @@ buf_LRU_free_block(
If it cannot be allocated (without freeing a block
from the LRU list), refuse to free bpage. */
alloc:
buf_pool_mutex_exit_forbid();
b = buf_buddy_alloc(sizeof *b, NULL);
buf_pool_mutex_exit_allow();
if (UNIV_UNLIKELY(!b)) {
return(BUF_LRU_CANNOT_RELOCATE);
}
b = buf_page_alloc_descriptor();
ut_a(b);
memcpy(b, bpage, sizeof *b);
}
@ -1589,7 +1507,7 @@ alloc:
mutex_enter(block_mutex);
}
return(BUF_LRU_FREED);
return(TRUE);
}
/******************************************************************//**
@ -1809,10 +1727,8 @@ buf_LRU_block_remove_hashed_page(
buf_pool_mutex_exit_forbid();
buf_buddy_free(bpage->zip.data,
page_zip_get_size(&bpage->zip));
bpage->state = BUF_BLOCK_ZIP_FREE;
buf_buddy_free(bpage, sizeof(*bpage));
buf_pool_mutex_exit_allow();
UNIV_MEM_UNDESC(bpage);
buf_page_free_descriptor(bpage);
return(BUF_BLOCK_ZIP_FREE);
case BUF_BLOCK_FILE_PAGE:

View File

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -37,24 +37,19 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
block->mutex. The buf_pool_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
and buf_pool_mutex was temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
Release a block. */
UNIV_INLINE

View File

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 2006, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -36,8 +36,8 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
The buf_pool_mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
The buf_pool_mutex may be released and reacquired.
@return allocated block, never NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
@ -46,9 +46,8 @@ buf_buddy_alloc_low(
or BUF_BUDDY_SIZES */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
__attribute__((malloc));
and buf_pool_mutex was temporarily released */
__attribute__((malloc, nonnull));
/**********************************************************************//**
Deallocate a block. */
@ -74,6 +73,8 @@ buf_buddy_get_slot(
ulint i;
ulint s;
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) {
}
@ -84,26 +85,25 @@ buf_buddy_get_slot(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
buf_pool_mutex and must not hold buf_pool_zip_mutex or any
block->mutex. The buf_pool_mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
buf_pool_mutex.
@return allocated block, possibly NULL if lru == NULL */
block->mutex. The buf_pool_mutex may be released and reacquired.
This function should only be used for allocating compressed page frames.
@return allocated block, never NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ulint size, /*!< in: compressed page size
(between PAGE_ZIP_MIN_SIZE and UNIV_PAGE_SIZE) */
ibool* lru) /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
and buf_pool_mutex was temporarily released,
or NULL if the LRU list should not be used */
and buf_pool_mutex was temporarily released */
{
ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
return((byte*) buf_buddy_alloc_low(buf_buddy_get_slot(size), lru));
}
/**********************************************************************//**
@ -117,6 +117,9 @@ buf_buddy_free(
ulint size) /*!< in: block size, up to UNIV_PAGE_SIZE */
{
ut_ad(buf_pool_mutex_own());
ut_ad(ut_is_2pow(size));
ut_ad(size >= PAGE_ZIP_MIN_SIZE);
ut_ad(size <= UNIV_PAGE_SIZE);
buf_buddy_free_low(buf, buf_buddy_get_slot(size));
}

View File

@ -156,6 +156,23 @@ UNIV_INLINE
ib_uint64_t
buf_pool_get_oldest_modification(void);
/*==================================*/
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
__attribute__((malloc));
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
__attribute__((nonnull));
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */

View File

@ -714,6 +714,35 @@ buf_block_get_lock_hash_val(
return(block->lock_hash_val);
}
/********************************************************************//**
Allocates a buf_page_t descriptor. This function must succeed. In case
of failure we assert in this function.
@return: the allocated descriptor. */
UNIV_INLINE
buf_page_t*
buf_page_alloc_descriptor(void)
/*===========================*/
{
buf_page_t* bpage;
bpage = (buf_page_t*) ut_malloc(sizeof *bpage);
ut_d(memset(bpage, 0, sizeof *bpage));
UNIV_MEM_ALLOC(bpage, sizeof *bpage);
return(bpage);
}
/********************************************************************//**
Free a buf_page_t descriptor. */
UNIV_INLINE
void
buf_page_free_descriptor(
/*=====================*/
buf_page_t* bpage) /*!< in: bpage descriptor to free. */
{
ut_free(bpage);
}
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */

View File

@ -30,18 +30,6 @@ Created 11/5/1995 Heikki Tuuri
#include "ut0byte.h"
#include "buf0types.h"
/** The return type of buf_LRU_free_block() */
enum buf_lru_free_block_status {
/** freed */
BUF_LRU_FREED = 0,
/** not freed because the caller asked to remove the
uncompressed frame but the control block cannot be
relocated */
BUF_LRU_CANNOT_RELOCATE,
/** not freed because of some other reason */
BUF_LRU_NOT_FREED
};
/******************************************************************//**
Tries to remove LRU flushed blocks from the end of the LRU list and put them
to the free list. This is beneficial for the efficiency of the insert buffer
@ -98,17 +86,16 @@ buf_LRU_insert_zip_clean(
Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily
NOTE: If this function returns TRUE, it will temporarily
release buf_pool_mutex. Furthermore, the page frame will no longer be
accessible via bpage.
The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
BUF_LRU_NOT_FREED otherwise. */
@return TRUE if freed, FALSE otherwise. */
UNIV_INTERN
enum buf_lru_free_block_status
ibool
buf_LRU_free_block(
/*===============*/
buf_page_t* bpage, /*!< in: block to be freed */

View File

@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@ -26,6 +26,8 @@ Created 11/17/1995 Heikki Tuuri
#ifndef buf0types_h
#define buf0types_h
#include "page0types.h"
/** Buffer page (uncompressed or compressed) */
typedef struct buf_page_struct buf_page_t;
/** Buffer block for which an uncompressed page exists */
@ -58,17 +60,10 @@ enum buf_io_fix {
/** Parameters of binary buddy system for compressed pages (buf0buddy.h) */
/* @{ */
#if UNIV_WORD_SIZE <= 4 /* 32-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 6
#else /* 64-bit system */
/** Base-2 logarithm of the smallest buddy block size */
# define BUF_BUDDY_LOW_SHIFT 7
#endif
#define BUF_BUDDY_LOW_SHIFT PAGE_ZIP_MIN_SIZE_SHIFT
#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT)
/*!< minimum block size in the binary
buddy system; must be at least
sizeof(buf_page_t) */
#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
/*!< number of buddy sizes */